github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/importer/picasa/picasa.go (about) 1 /* 2 Copyright 2014 The Camlistore Authors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package picasa is an importer for Picasa Web. 18 package picasa 19 20 import ( 21 "fmt" 22 "io" 23 "log" 24 "net/http" 25 "strings" 26 "sync" 27 28 "camlistore.org/pkg/blob" 29 "camlistore.org/pkg/context" 30 "camlistore.org/pkg/importer" 31 "camlistore.org/pkg/jsonconfig" 32 "camlistore.org/pkg/schema" 33 "camlistore.org/pkg/search" 34 "camlistore.org/pkg/syncutil" 35 36 "camlistore.org/third_party/code.google.com/p/goauth2/oauth" 37 "camlistore.org/third_party/github.com/tgulacsi/picago" 38 ) 39 40 var parallelWorkers = 4 41 var parallelAlbumRoutines = 4 42 43 func init() { 44 importer.Register("picasa", newFromConfig) 45 } 46 47 func newFromConfig(cfg jsonconfig.Obj, host *importer.Host) (importer.Importer, error) { 48 key := cfg.RequiredString("apiKey") 49 err := cfg.Validate() 50 if err != nil { 51 return nil, err 52 } 53 chunks := strings.SplitN(key, ":", 2) 54 if len(chunks) < 2 { 55 return nil, fmt.Errorf("Picasa apiKey must be in the format cleintID:clientSecret (got %s)", key) 56 } 57 im := &imp{ 58 //clientID: chunks[0], 59 //clientSecret: chunks[1], 60 host: host, 61 } 62 if im.transport, err = picago.NewTransport(chunks[0], chunks[1], im); err != nil { 63 return nil, err 64 } 65 return im, nil 66 } 67 68 type imp struct { 69 //clientID string 70 //clientSecret string 71 sync.Mutex 72 transport *oauth.Transport 73 host *importer.Host 74 } 75 76 func (im *imp) CanHandleURL(url string) bool { return false } 77 func (im *imp) ImportURL(url string) error { panic("unused") } 78 79 func (im *imp) Prefix() string { 80 cid := "" 81 if im.transport != nil { 82 cid = im.transport.Config.ClientId 83 } 84 return fmt.Sprintf("picasa:%s", cid) 85 } 86 87 func (im *imp) Run(ctx *context.Context) (err error) { 88 log.Printf("Running picasa importer.") 89 defer func() { 90 log.Printf("picasa importer returned: %v", err) 91 }() 92 93 im.Lock() 94 client := &http.Client{Transport: im.transport} 95 im.Unlock() 96 97 root, err := im.getRootNode() 98 if err != nil { 99 return err 100 } 101 itemch := make(chan imageFile) 102 errch := make(chan error, parallelWorkers) 103 tbd := make(chan imageFile) 104 105 // For caching album name -> imported Object, to skip lookup by path 106 // (Attr) as much as possible. 107 var albumCacheMu sync.Mutex 108 albumCache := make(map[string]*importer.Object) 109 110 getParentObj := func(name, title string) *importer.Object { 111 albumCacheMu.Lock() 112 defer albumCacheMu.Unlock() 113 parent, ok := albumCache[name] 114 if ok { 115 return parent 116 } 117 118 parent, err = im.getChildByPath(name) 119 if err != nil { 120 log.Printf("getParentObj(%s): %v", name, err) 121 } 122 if parent == nil { 123 parent, err = root.ChildPathObject(name) 124 if err != nil { 125 log.Printf("error creating ChildPathObject(%s): %v", name, err) 126 errch <- err 127 parent = root 128 } 129 } 130 albumCache[name] = parent 131 if err = parent.SetAttrs("title", title, "tag", name); err != nil { 132 errch <- err 133 } 134 return parent 135 } 136 137 var workers sync.WaitGroup 138 worker := func() { 139 for img := range tbd { 140 parent := getParentObj(img.albumName, img.albumTitle) 141 142 fn := img.albumName + "/" + img.fileName 143 log.Printf("importing %s", fn) 144 fileRef, err := schema.WriteFileFromReader(im.host.Target(), fn, img.r) 145 img.r.Close() 146 if err != nil { 147 // FIXME(tgulacsi): cannot download movies 148 log.Printf("error downloading %s: %v", img.fileName, err) 149 continue 150 } 151 // parent will have an attr camliPath:img.fileName set to this permanode 152 obj, err := parent.ChildPathObject(img.fileName) 153 if err != nil { 154 errch <- err 155 } 156 157 if err = obj.SetAttrs( 158 "camliContent", fileRef.String(), 159 "album", img.albumTitle, 160 "tag", img.albumName, 161 ); err != nil { 162 errch <- err 163 } 164 } 165 workers.Done() 166 } 167 168 workers.Add(parallelWorkers) 169 for i := 0; i < parallelWorkers; i++ { 170 go worker() 171 } 172 173 // decide whether we should import this image 174 filter := func(img imageFile) (bool, error) { 175 intrErr := func(e error) error { 176 if e != nil { 177 return e 178 } 179 if ctx.IsCanceled() { 180 return context.ErrCanceled 181 } 182 return nil 183 } 184 parent := getParentObj(img.albumName, img.albumTitle) 185 if parent != nil { 186 pn := parent.Attr("camliPath:" + img.fileName) 187 if pn != "" { 188 ref, ok := blob.Parse(pn) 189 if !ok { 190 return true, fmt.Errorf("cannot parse %s as blobRef", pn) 191 } 192 obj, err := im.host.ObjectFromRef(ref) 193 if err != nil { 194 return false, err 195 } 196 if obj != nil { 197 log.Printf("%s/%s already imported as %s.", 198 img.albumName, img.fileName, obj.PermanodeRef()) 199 return false, intrErr(nil) 200 } 201 } 202 } 203 return true, intrErr(nil) 204 } 205 206 go iterItems(itemch, errch, filter, client, "default") 207 for { 208 select { 209 case err = <-errch: 210 close(tbd) 211 if err == context.ErrCanceled { 212 log.Printf("Picasa importer has been interrupted.") 213 } else { 214 log.Printf("Picasa importer error: %v", err) 215 workers.Wait() 216 } 217 return err 218 case <-ctx.Done(): 219 log.Printf("Picasa importer has been interrupted.") 220 close(tbd) 221 return context.ErrCanceled 222 case img := <-itemch: 223 tbd <- img 224 } 225 } 226 close(tbd) 227 workers.Wait() 228 return nil 229 } 230 231 func (im *imp) getRootNode() (*importer.Object, error) { 232 root, err := im.host.RootObject() 233 if err != nil { 234 return nil, err 235 } 236 237 if root.Attr("title") == "" { 238 //FIXME(tgulacsi): we need the username, from somewhere 239 title := fmt.Sprintf("Picasa (%s)", "default") 240 if err := root.SetAttr("title", title); err != nil { 241 return nil, err 242 } 243 } 244 return root, nil 245 } 246 247 // getChildByPath searches for attribute camliPath:path and returns the object 248 // to which this permanode points. 249 // This is the reverse of imp.ChildPathObject. 250 func (im *imp) getChildByPath(path string) (obj *importer.Object, err error) { 251 key := "camliPath:" + path 252 defer func() { 253 log.Printf("search for %s resulted in %v/%v", path, obj, err) 254 }() 255 res, e := im.host.Search().GetPermanodesWithAttr(&search.WithAttrRequest{ 256 N: 2, // only expect 1 257 Attr: key, 258 }) 259 log.Printf("searching for %s: %v, %v", key, res, e) 260 if e != nil { 261 err = e 262 log.Printf("getChildByPath searching GetPermanodesWithAttr: %v", err) 263 return nil, err 264 } 265 if len(res.WithAttr) == 0 { 266 return nil, nil 267 } 268 if len(res.WithAttr) > 1 { 269 err = fmt.Errorf("Found %d import roots for %q; want 1", len(res.WithAttr), path) 270 return nil, err 271 } 272 pn := res.WithAttr[0].Permanode 273 parent, e := im.host.ObjectFromRef(pn) 274 if e != nil { 275 err = e 276 return nil, err 277 } 278 br := parent.Attr(key) 279 pn, ok := blob.Parse(br) 280 if !ok { 281 err = fmt.Errorf("cannot parse %s (value of %s.%s) as blobRef", 282 br, parent, key) 283 return nil, err 284 } 285 obj, err = im.host.ObjectFromRef(pn) 286 return obj, err 287 } 288 289 type imageFile struct { 290 albumTitle, albumName string 291 fileName string 292 ID string 293 r io.ReadCloser 294 } 295 296 type filterFunc func(imageFile) (bool, error) 297 298 func iterItems(itemch chan<- imageFile, errch chan<- error, 299 filter filterFunc, client *http.Client, username string) { 300 301 defer close(itemch) 302 303 albums, err := picago.GetAlbums(client, username) 304 if err != nil { 305 errch <- err 306 return 307 } 308 gate := syncutil.NewGate(parallelAlbumRoutines) 309 for _, album := range albums { 310 photos, err := picago.GetPhotos(client, username, album.ID) 311 if err != nil { 312 select { 313 case errch <- err: 314 default: 315 return 316 } 317 continue 318 } 319 gate.Start() 320 go func(albumName, albumTitle string) { 321 defer gate.Done() 322 for _, photo := range photos { 323 img := imageFile{ 324 albumTitle: albumTitle, 325 albumName: albumName, 326 fileName: photo.Filename(), 327 ID: photo.ID, 328 } 329 ok, err := filter(img) 330 if err != nil { 331 errch <- err 332 return 333 } 334 if !ok { 335 continue 336 } 337 338 img.r, err = picago.DownloadPhoto(client, photo.URL) 339 if err != nil { 340 select { 341 case errch <- fmt.Errorf("Get(%s): %v", photo.URL, err): 342 default: 343 return 344 } 345 continue 346 } 347 itemch <- img 348 } 349 }(album.Name, album.Title) 350 } 351 }