github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/index/keys.go (about)

     1  /*
     2  Copyright 2011 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package index
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"strings"
    23  )
    24  
    25  // requiredSchemaVersion is incremented every time
    26  // an index key type is added, changed, or removed.
    27  // Version 4: EXIF tags + GPS
    28  const requiredSchemaVersion = 4
    29  
    30  // type of key returns the identifier in k before the first ":" or "|".
    31  // (Originally we packed keys by hand and there are a mix of styles)
    32  func typeOfKey(k string) string {
    33  	c := strings.Index(k, ":")
    34  	p := strings.Index(k, "|")
    35  	if c < 0 && p < 0 {
    36  		return ""
    37  	}
    38  	if c < 0 {
    39  		return k[:p]
    40  	}
    41  	if p < 0 {
    42  		return k[:c]
    43  	}
    44  	min := c
    45  	if p < min {
    46  		min = p
    47  	}
    48  	return k[:min]
    49  }
    50  
    51  type keyType struct {
    52  	name     string
    53  	keyParts []part
    54  	valParts []part
    55  }
    56  
    57  func (k *keyType) Prefix(args ...interface{}) string {
    58  	return k.build(true, true, k.keyParts, args...)
    59  }
    60  
    61  func (k *keyType) Key(args ...interface{}) string {
    62  	return k.build(false, true, k.keyParts, args...)
    63  }
    64  
    65  func (k *keyType) Val(args ...interface{}) string {
    66  	return k.build(false, false, k.valParts, args...)
    67  }
    68  
    69  func (k *keyType) build(isPrefix, isKey bool, parts []part, args ...interface{}) string {
    70  	var buf bytes.Buffer
    71  	if isKey {
    72  		buf.WriteString(k.name)
    73  	}
    74  	if !isPrefix && len(args) != len(parts) {
    75  		panic("wrong number of arguments")
    76  	}
    77  	if len(args) > len(parts) {
    78  		panic("too many arguments")
    79  	}
    80  	for i, arg := range args {
    81  		if isKey || i > 0 {
    82  			buf.WriteString("|")
    83  		}
    84  		asStr := func() string {
    85  			s, ok := arg.(string)
    86  			if !ok {
    87  				s = arg.(fmt.Stringer).String()
    88  			}
    89  			return s
    90  		}
    91  		switch parts[i].typ {
    92  		case typeIntStr:
    93  			switch arg.(type) {
    94  			case int, int64, uint64:
    95  				buf.WriteString(fmt.Sprintf("%d", arg))
    96  			default:
    97  				panic("bogus int type")
    98  			}
    99  		case typeStr:
   100  			buf.WriteString(urle(asStr()))
   101  		case typeRawStr:
   102  			buf.WriteString(asStr())
   103  		case typeReverseTime:
   104  			s := asStr()
   105  			const example = "2011-01-23T05:23:12"
   106  			if len(s) < len(example) || s[4] != '-' && s[10] != 'T' {
   107  				panic("doesn't look like a time: " + s)
   108  			}
   109  			buf.WriteString(reverseTimeString(s))
   110  		default:
   111  			if s, ok := arg.(string); ok {
   112  				buf.WriteString(s)
   113  			} else {
   114  				buf.WriteString(arg.(fmt.Stringer).String())
   115  			}
   116  		}
   117  	}
   118  	if isPrefix {
   119  		buf.WriteString("|")
   120  	}
   121  	return buf.String()
   122  }
   123  
   124  type part struct {
   125  	name string
   126  	typ  partType
   127  }
   128  
   129  type partType int
   130  
   131  const (
   132  	typeKeyId partType = iota // PGP key id
   133  	typeTime
   134  	typeReverseTime // time prepended with "rt" + each numeric digit reversed from '9'
   135  	typeBlobRef
   136  	typeStr    // URL-escaped
   137  	typeIntStr // integer as string
   138  	typeRawStr // not URL-escaped
   139  )
   140  
   141  var (
   142  	// keySchemaVersion indexes the index schema version.
   143  	keySchemaVersion = &keyType{
   144  		"schemaversion",
   145  		nil,
   146  		[]part{
   147  			{"version", typeIntStr},
   148  		},
   149  	}
   150  
   151  	keyMissing = &keyType{
   152  		"missing",
   153  		[]part{
   154  			{"have", typeBlobRef},
   155  			{"needed", typeBlobRef},
   156  		},
   157  		[]part{
   158  			{"1", typeStr},
   159  		},
   160  	}
   161  
   162  	// keyPermanodeClaim indexes when a permanode is modified (or deleted) by a claim.
   163  	// It ties the affected permanode to the date of the modification, the responsible
   164  	// claim, and the nature of the modification.
   165  	keyPermanodeClaim = &keyType{
   166  		"claim",
   167  		[]part{
   168  			{"permanode", typeBlobRef}, // modified permanode
   169  			{"signer", typeKeyId},
   170  			{"claimDate", typeTime},
   171  			{"claim", typeBlobRef},
   172  		},
   173  		[]part{
   174  			{"claimType", typeStr},
   175  			{"attr", typeStr},
   176  			{"value", typeStr},
   177  			// And the signerRef, which seems redundant
   178  			// with the signer keyId in the jey, but the
   179  			// Claim struct needs this, and there's 1:m
   180  			// for keyId:blobRef, so:
   181  			{"signerRef", typeBlobRef},
   182  		},
   183  	}
   184  
   185  	keyRecentPermanode = &keyType{
   186  		"recpn",
   187  		[]part{
   188  			{"owner", typeKeyId},
   189  			{"modtime", typeReverseTime},
   190  			{"claim", typeBlobRef},
   191  		},
   192  		nil,
   193  	}
   194  
   195  	keyPathBackward = &keyType{
   196  		"signertargetpath",
   197  		[]part{
   198  			{"signer", typeKeyId},
   199  			{"target", typeBlobRef},
   200  			{"claim", typeBlobRef}, // for key uniqueness
   201  		},
   202  		[]part{
   203  			{"claimDate", typeTime},
   204  			{"base", typeBlobRef},
   205  			{"active", typeStr}, // 'Y', or 'N' for deleted
   206  			{"suffix", typeStr},
   207  		},
   208  	}
   209  
   210  	keyPathForward = &keyType{
   211  		"path",
   212  		[]part{
   213  			{"signer", typeKeyId},
   214  			{"base", typeBlobRef},
   215  			{"suffix", typeStr},
   216  			{"claimDate", typeReverseTime},
   217  			{"claim", typeBlobRef}, // for key uniqueness
   218  		},
   219  		[]part{
   220  			{"active", typeStr}, // 'Y', or 'N' for deleted
   221  			{"target", typeBlobRef},
   222  		},
   223  	}
   224  
   225  	keyWholeToFileRef = &keyType{
   226  		"wholetofile",
   227  		[]part{
   228  			{"whole", typeBlobRef},
   229  			{"schema", typeBlobRef}, // for key uniqueness
   230  		},
   231  		[]part{
   232  			{"1", typeStr},
   233  		},
   234  	}
   235  
   236  	keyFileInfo = &keyType{
   237  		"fileinfo",
   238  		[]part{
   239  			{"file", typeBlobRef},
   240  		},
   241  		[]part{
   242  			{"size", typeIntStr},
   243  			{"filename", typeStr},
   244  			{"mimetype", typeStr},
   245  		},
   246  	}
   247  
   248  	keyFileTimes = &keyType{
   249  		"filetimes",
   250  		[]part{
   251  			{"file", typeBlobRef},
   252  		},
   253  		[]part{
   254  			// 0, 1, or 2 comma-separated types.Time3339
   255  			// strings for creation/mod times. Oldest,
   256  			// then newest. See FileInfo docs.
   257  			{"time3339s", typeStr},
   258  		},
   259  	}
   260  
   261  	keySignerAttrValue = &keyType{
   262  		"signerattrvalue",
   263  		[]part{
   264  			{"signer", typeKeyId},
   265  			{"attr", typeStr},
   266  			{"value", typeStr},
   267  			{"claimdate", typeReverseTime},
   268  			{"claimref", typeBlobRef},
   269  		},
   270  		[]part{
   271  			{"permanode", typeBlobRef},
   272  		},
   273  	}
   274  
   275  	// keyDeleted indexes a claim that deletes an entity. It ties the deleted
   276  	// entity to the date it was deleted, and to the deleter claim.
   277  	keyDeleted = &keyType{
   278  		"deleted",
   279  		[]part{
   280  			{"deleted", typeBlobRef}, // the deleted entity (a permanode or another claim)
   281  			{"claimdate", typeReverseTime},
   282  			{"deleter", typeBlobRef}, // the deleter claim blobref
   283  		},
   284  		nil,
   285  	}
   286  
   287  	// Given a blobref (permanode or static file or directory), provide a mapping
   288  	// to potential parents (they may no longer be parents, in the case of permanodes).
   289  	// In the case of permanodes, camliMember or camliContent constitutes a forward
   290  	// edge.  In the case of static directories, the forward path is dir->static set->file,
   291  	// and that's what's indexed here, inverted.
   292  	keyEdgeBackward = &keyType{
   293  		"edgeback",
   294  		[]part{
   295  			{"child", typeBlobRef},  // the edge target; thing we want to find parent(s) of
   296  			{"parent", typeBlobRef}, // the parent / edge source (e.g. permanode blobref)
   297  			// the blobref is the blob establishing the relationship
   298  			// (for a permanode: the claim; for static: often same as parent)
   299  			{"blobref", typeBlobRef},
   300  		},
   301  		[]part{
   302  			{"parenttype", typeStr}, // either "permanode" or the camliType ("file", "static-set", etc)
   303  			{"name", typeStr},       // the name, if static.
   304  		},
   305  	}
   306  
   307  	// Width and height after any EXIF rotation.
   308  	keyImageSize = &keyType{
   309  		"imagesize",
   310  		[]part{
   311  			{"fileref", typeBlobRef}, // blobref of "file" schema blob
   312  		},
   313  		[]part{
   314  			{"width", typeStr},
   315  			{"height", typeStr},
   316  		},
   317  	}
   318  
   319  	// child of a directory
   320  	keyStaticDirChild = &keyType{
   321  		"dirchild",
   322  		[]part{
   323  			{"dirref", typeBlobRef}, // blobref of "directory" schema blob
   324  			{"child", typeStr},      // blobref of the child
   325  		},
   326  		[]part{
   327  			{"1", typeStr},
   328  		},
   329  	}
   330  
   331  	// Media attributes (e.g. ID3 tags). Uses generic terms like
   332  	// "artist", "title", "album", etc.
   333  	keyMediaTag = &keyType{
   334  		"mediatag",
   335  		[]part{
   336  			{"wholeRef", typeBlobRef}, // wholeRef for song
   337  			{"tag", typeStr},
   338  		},
   339  		[]part{
   340  			{"value", typeStr},
   341  		},
   342  	}
   343  
   344  	// EXIF tags
   345  	keyEXIFTag = &keyType{
   346  		"exiftag",
   347  		[]part{
   348  			{"wholeRef", typeBlobRef}, // of entire file, not fileref
   349  			{"tag", typeStr},          // uint16 tag number as hex: xxxx
   350  		},
   351  		[]part{
   352  			{"type", typeStr},    // "int", "rat", "float", "string"
   353  			{"n", typeIntStr},    // n components of type
   354  			{"vals", typeRawStr}, // pipe-separated; rats are n/d. strings are URL-escaped.
   355  		},
   356  	}
   357  
   358  	// Redundant version of keyEXIFTag. TODO: maybe get rid of this.
   359  	// Easier to process as one row instead of 4, though.
   360  	keyEXIFGPS = &keyType{
   361  		"exifgps",
   362  		[]part{
   363  			{"wholeRef", typeBlobRef}, // of entire file, not fileref
   364  		},
   365  		[]part{
   366  			{"lat", typeStr},
   367  			{"long", typeStr},
   368  		},
   369  	}
   370  )
   371  
   372  func containsUnsafeRawStrByte(s string) bool {
   373  	for _, r := range s {
   374  		if r >= 'z' || r < ' ' {
   375  			// pipe ('|) and non-ASCII are above 'z'.
   376  			return true
   377  		}
   378  		if r == '%' || r == '+' {
   379  			// Could be interpretted as URL-encoded
   380  			return true
   381  		}
   382  	}
   383  	return false
   384  }