github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/pkg/importer/importer.go (about) 1 /* 2 Copyright 2013 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package importer imports content from third-party websites. 18 // 19 // TODO(bradfitz): Finish this. Barely started. 20 package importer 21 22 import ( 23 "errors" 24 "fmt" 25 "log" 26 "net/http" 27 "sync" 28 29 "camlistore.org/pkg/blob" 30 "camlistore.org/pkg/blobserver" 31 "camlistore.org/pkg/httputil" 32 "camlistore.org/pkg/jsonconfig" 33 "camlistore.org/pkg/jsonsign/signhandler" 34 "camlistore.org/pkg/schema" 35 "camlistore.org/pkg/search" 36 "camlistore.org/pkg/server" 37 "camlistore.org/pkg/syncutil" 38 ) 39 40 // A Host is the environment hosting an importer. 41 type Host struct { 42 BaseURL string 43 44 imp Importer 45 target blobserver.StatReceiver 46 search *search.Handler 47 signer *schema.Signer 48 49 // client optionally specifies how to fetch external network 50 // resources. If nil, http.DefaultClient is used. 51 client *http.Client 52 transport http.RoundTripper 53 54 mu sync.Mutex 55 running bool 56 stopreq chan struct{} // closed to signal importer to stop and return an error 57 lastProgress *ProgressMessage 58 lastRunErr error 59 } 60 61 func (h *Host) String() string { 62 return fmt.Sprintf("%T(%s)", h, h.imp) 63 } 64 65 func (h *Host) Target() blobserver.StatReceiver { 66 return h.target 67 } 68 69 func (h *Host) Search() *search.Handler { 70 return h.search 71 } 72 73 func (h *Host) ServeHTTP(w http.ResponseWriter, r *http.Request) { 74 if httputil.PathSuffix(r) == "" { 75 switch r.FormValue("mode") { 76 case "": 77 case "start": 78 h.start() 79 case "stop": 80 h.stop() 81 default: 82 fmt.Fprintf(w, "Unknown mode") 83 } 84 h.mu.Lock() 85 defer h.mu.Unlock() 86 fmt.Fprintf(w, "I am an importer of type %T; running=%v; last progress=%#v", 87 h.imp, h.running, h.lastProgress) 88 } else { 89 // TODO(aa): Remove this temporary hack once the UI has a way to configure importers. 90 h.imp.ServeHTTP(w, r) 91 } 92 } 93 94 func (h *Host) start() { 95 h.mu.Lock() 96 defer h.mu.Unlock() 97 if h.running { 98 return 99 } 100 h.running = true 101 stopCh := make(chan struct{}) 102 h.stopreq = stopCh 103 go func() { 104 log.Printf("Starting importer %s", h) 105 err := h.imp.Run(stopCh) 106 if err != nil { 107 log.Printf("Importer %s error: %v", h, err) 108 } else { 109 log.Printf("Importer %s finished.", h) 110 } 111 h.mu.Lock() 112 defer h.mu.Unlock() 113 h.running = false 114 h.lastRunErr = err 115 }() 116 } 117 118 func (h *Host) stop() { 119 h.mu.Lock() 120 defer h.mu.Unlock() 121 if !h.running { 122 return 123 } 124 h.running = false 125 close(h.stopreq) 126 } 127 128 // HTTPClient returns the HTTP client to use. 129 func (h *Host) HTTPClient() *http.Client { 130 if h.client == nil { 131 return http.DefaultClient 132 } 133 return h.client 134 } 135 136 // HTTPTransport returns the HTTP transport to use. 137 func (h *Host) HTTPTransport() http.RoundTripper { 138 if h.transport == nil { 139 return http.DefaultTransport 140 } 141 return h.transport 142 } 143 144 type ProgressMessage struct { 145 ItemsDone, ItemsTotal int 146 BytesDone, BytesTotal int64 147 } 148 149 func (h *Host) upload(bb *schema.Builder) (br blob.Ref, err error) { 150 signed, err := bb.Sign(h.signer) 151 if err != nil { 152 return 153 } 154 sb, err := blobserver.ReceiveString(h.target, signed) 155 if err != nil { 156 return 157 } 158 return sb.Ref, nil 159 } 160 161 // NewObject creates a new permanode and returns its Object wrapper. 162 func (h *Host) NewObject() (*Object, error) { 163 pn, err := h.upload(schema.NewUnsignedPermanode()) 164 if err != nil { 165 return nil, err 166 } 167 // No need to do a describe query against it: we know it's 168 // empty (has no claims against it yet). 169 return &Object{h: h, pn: pn}, nil 170 } 171 172 // An Object is wrapper around a permanode that the importer uses 173 // to synchronize. 174 type Object struct { 175 h *Host 176 pn blob.Ref // permanode ref 177 178 mu sync.RWMutex 179 attr map[string][]string 180 } 181 182 // PermanodeRef returns the permanode that this object wraps. 183 func (o *Object) PermanodeRef() blob.Ref { 184 return o.pn 185 } 186 187 // Attr returns the object's attribute value for the provided attr, 188 // or the empty string if unset. To distinguish between unset, 189 // an empty string, or multiple attribute values, use Attrs. 190 func (o *Object) Attr(attr string) string { 191 o.mu.RLock() 192 defer o.mu.RUnlock() 193 if v := o.attr[attr]; len(v) > 0 { 194 return v[0] 195 } 196 return "" 197 } 198 199 // Attrs returns the attribute values for the provided attr. 200 func (o *Object) Attrs(attr string) []string { 201 o.mu.RLock() 202 defer o.mu.RUnlock() 203 return o.attr[attr] 204 } 205 206 // SetAttr sets the attribute key to value. 207 func (o *Object) SetAttr(key, value string) error { 208 if o.Attr(key) == value { 209 return nil 210 } 211 _, err := o.h.upload(schema.NewSetAttributeClaim(o.pn, key, value)) 212 if err != nil { 213 return err 214 } 215 o.mu.Lock() 216 defer o.mu.Unlock() 217 if o.attr == nil { 218 o.attr = make(map[string][]string) 219 } 220 o.attr[key] = []string{value} 221 return nil 222 } 223 224 // SetAttrs sets multiple attributes. The provided keyval should be an even number of alternating key/value pairs to set. 225 func (o *Object) SetAttrs(keyval ...string) error { 226 if len(keyval)%2 == 1 { 227 panic("importer.SetAttrs: odd argument count") 228 } 229 230 g := syncutil.Group{} 231 for i := 0; i < len(keyval); i += 2 { 232 key, val := keyval[i], keyval[i+1] 233 if val != o.Attr(key) { 234 g.Go(func() error { 235 return o.SetAttr(key, val) 236 }) 237 } 238 } 239 return g.Err() 240 } 241 242 // ChildPathObject returns (creating if necessary) the child object 243 // from the permanode o, given by the "camliPath:xxxx" attribute, 244 // where xxx is the provided path. 245 func (o *Object) ChildPathObject(path string) (*Object, error) { 246 attrName := "camliPath:" + path 247 if v := o.Attr(attrName); v != "" { 248 br, ok := blob.Parse(v) 249 if ok { 250 return o.h.ObjectFromRef(br) 251 } 252 } 253 254 childBlobRef, err := o.h.upload(schema.NewUnsignedPermanode()) 255 if err != nil { 256 return nil, err 257 } 258 259 if err := o.SetAttr(attrName, childBlobRef.String()); err != nil { 260 return nil, err 261 } 262 263 return &Object{ 264 h: o.h, 265 pn: childBlobRef, 266 }, nil 267 } 268 269 // RootObject returns the root permanode for this importer account. 270 func (h *Host) RootObject() (*Object, error) { 271 res, err := h.search.GetPermanodesWithAttr(&search.WithAttrRequest{ 272 N: 2, // only expect 1 273 Attr: "camliImportRoot", 274 Value: h.imp.Prefix(), 275 }) 276 if err != nil { 277 log.Printf("RootObject searching GetPermanodesWithAttr: %v", err) 278 return nil, err 279 } 280 if len(res.WithAttr) == 0 { 281 obj, err := h.NewObject() 282 if err != nil { 283 return nil, err 284 } 285 log.Printf("No root object found. Created %v", obj.pn) 286 if err := obj.SetAttr("camliImportRoot", h.imp.Prefix()); err != nil { 287 return nil, err 288 } 289 return obj, nil 290 } 291 if len(res.WithAttr) > 1 { 292 return nil, fmt.Errorf("Found %d import roots for %q; want 1", len(res.WithAttr), h.imp.Prefix()) 293 } 294 pn := res.WithAttr[0].Permanode 295 return h.ObjectFromRef(pn) 296 } 297 298 // ObjectFromRef returns the object given by the named permanode 299 func (h *Host) ObjectFromRef(permanodeRef blob.Ref) (*Object, error) { 300 res, err := h.search.Describe(&search.DescribeRequest{ 301 BlobRef: permanodeRef, 302 Depth: 1, 303 }) 304 if err != nil { 305 return nil, err 306 } 307 db, ok := res.Meta[permanodeRef.String()] 308 if !ok { 309 return nil, fmt.Errorf("permanode %v wasn't in Describe response", permanodeRef) 310 } 311 if db.Permanode == nil { 312 return nil, fmt.Errorf("permanode %v had no DescribedPermanode in Describe response", permanodeRef) 313 } 314 return &Object{ 315 h: h, 316 pn: permanodeRef, 317 attr: map[string][]string(db.Permanode.Attr), 318 }, nil 319 } 320 321 // ErrInterrupted should be returned by importers 322 // when an Interrupt fires. 323 var ErrInterrupted = errors.New("import interrupted by request") 324 325 // An Interrupt is passed to importers for them to monitor 326 // requests to stop importing. The channel is closed as 327 // a signal to stop. 328 type Interrupt <-chan struct{} 329 330 // ShouldStop returns whether the interrupt has fired. 331 // If so, importers should return ErrInterrupted. 332 func (i Interrupt) ShouldStop() bool { 333 select { 334 case <-i: 335 return true 336 default: 337 return false 338 } 339 } 340 341 // An Importer imports from a third-party site. 342 type Importer interface { 343 // Run runs a full or increment import. 344 Run(Interrupt) error 345 346 // Prefix returns the unique prefix for this importer. 347 // It should be of the form "serviceType:username". 348 // Further colons are added to form the names of planned 349 // permanodes. 350 Prefix() string 351 352 // CanHandleURL returns whether a URL (such as one a user is 353 // viewing in their browser and dragged onto Camlistore) is a 354 // form recognized by this importer. If so, its full metadata 355 // and full data (e.g. unscaled image) can be fetched, rather 356 // than just fetching the HTML of the URL. 357 // 358 // TODO: implement and use this. For now importers can return 359 // stub these and return false/errors. They're unused. 360 CanHandleURL(url string) bool 361 ImportURL(url string) error 362 363 ServeHTTP(w http.ResponseWriter, r *http.Request) 364 } 365 366 // Constructor is the function type that importers must register at init time. 367 type Constructor func(jsonconfig.Obj, *Host) (Importer, error) 368 369 var ( 370 mu sync.Mutex 371 ctors = make(map[string]Constructor) 372 ) 373 374 func Register(name string, fn Constructor) { 375 mu.Lock() 376 defer mu.Unlock() 377 if _, dup := ctors[name]; dup { 378 panic("Dup registration of importer " + name) 379 } 380 ctors[name] = fn 381 } 382 383 func Create(name string, hl blobserver.Loader, baseURL string, cfg jsonconfig.Obj) (*Host, error) { 384 mu.Lock() 385 defer mu.Unlock() 386 fn := ctors[name] 387 if fn == nil { 388 return nil, fmt.Errorf("Unknown importer type %q", name) 389 } 390 h := &Host{ 391 BaseURL: baseURL, 392 } 393 imp, err := fn(cfg, h) 394 if err != nil { 395 return nil, err 396 } 397 h.imp = imp 398 return h, nil 399 } 400 401 func (h *Host) InitHandler(hl blobserver.FindHandlerByTyper) error { 402 _, handler, err := hl.FindHandlerByType("root") 403 if err != nil || handler == nil { 404 return errors.New("importer requires a 'root' handler") 405 } 406 rh := handler.(*server.RootHandler) 407 searchHandler, ok := rh.SearchHandler() 408 if !ok { 409 return errors.New("importer requires a 'root' handler with 'searchRoot' defined.") 410 } 411 h.search = searchHandler 412 if rh.Storage == nil { 413 return errors.New("importer requires a 'root' handler with 'blobRoot' defined.") 414 } 415 h.target = rh.Storage 416 417 _, handler, _ = hl.FindHandlerByType("jsonsign") 418 if sigh, ok := handler.(*signhandler.Handler); ok { 419 h.signer = sigh.Signer() 420 } 421 if h.signer == nil { 422 return errors.New("importer requires a 'jsonsign' handler") 423 } 424 425 return nil 426 }