github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/importer/picasa/picasa.go (about)

     1  /*
     2  Copyright 2014 The Camlistore Authors
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package picasa is an importer for Picasa Web.
    18  package picasa
    19  
    20  import (
    21  	"fmt"
    22  	"io"
    23  	"log"
    24  	"net/http"
    25  	"strings"
    26  	"sync"
    27  
    28  	"camlistore.org/pkg/blob"
    29  	"camlistore.org/pkg/context"
    30  	"camlistore.org/pkg/importer"
    31  	"camlistore.org/pkg/jsonconfig"
    32  	"camlistore.org/pkg/schema"
    33  	"camlistore.org/pkg/search"
    34  	"camlistore.org/pkg/syncutil"
    35  
    36  	"camlistore.org/third_party/code.google.com/p/goauth2/oauth"
    37  	"camlistore.org/third_party/github.com/tgulacsi/picago"
    38  )
    39  
    40  var parallelWorkers = 4
    41  var parallelAlbumRoutines = 4
    42  
    43  func init() {
    44  	importer.Register("picasa", newFromConfig)
    45  }
    46  
    47  func newFromConfig(cfg jsonconfig.Obj, host *importer.Host) (importer.Importer, error) {
    48  	key := cfg.RequiredString("apiKey")
    49  	err := cfg.Validate()
    50  	if err != nil {
    51  		return nil, err
    52  	}
    53  	chunks := strings.SplitN(key, ":", 2)
    54  	if len(chunks) < 2 {
    55  		return nil, fmt.Errorf("Picasa apiKey must be in the format cleintID:clientSecret (got %s)", key)
    56  	}
    57  	im := &imp{
    58  		//clientID:     chunks[0],
    59  		//clientSecret: chunks[1],
    60  		host: host,
    61  	}
    62  	if im.transport, err = picago.NewTransport(chunks[0], chunks[1], im); err != nil {
    63  		return nil, err
    64  	}
    65  	return im, nil
    66  }
    67  
    68  type imp struct {
    69  	//clientID     string
    70  	//clientSecret string
    71  	sync.Mutex
    72  	transport *oauth.Transport
    73  	host      *importer.Host
    74  }
    75  
    76  func (im *imp) CanHandleURL(url string) bool { return false }
    77  func (im *imp) ImportURL(url string) error   { panic("unused") }
    78  
    79  func (im *imp) Prefix() string {
    80  	cid := ""
    81  	if im.transport != nil {
    82  		cid = im.transport.Config.ClientId
    83  	}
    84  	return fmt.Sprintf("picasa:%s", cid)
    85  }
    86  
    87  func (im *imp) Run(ctx *context.Context) (err error) {
    88  	log.Printf("Running picasa importer.")
    89  	defer func() {
    90  		log.Printf("picasa importer returned: %v", err)
    91  	}()
    92  
    93  	im.Lock()
    94  	client := &http.Client{Transport: im.transport}
    95  	im.Unlock()
    96  
    97  	root, err := im.getRootNode()
    98  	if err != nil {
    99  		return err
   100  	}
   101  	itemch := make(chan imageFile)
   102  	errch := make(chan error, parallelWorkers)
   103  	tbd := make(chan imageFile)
   104  
   105  	// For caching album name -> imported Object, to skip lookup by path
   106  	// (Attr) as much as possible.
   107  	var albumCacheMu sync.Mutex
   108  	albumCache := make(map[string]*importer.Object)
   109  
   110  	getParentObj := func(name, title string) *importer.Object {
   111  		albumCacheMu.Lock()
   112  		defer albumCacheMu.Unlock()
   113  		parent, ok := albumCache[name]
   114  		if ok {
   115  			return parent
   116  		}
   117  
   118  		parent, err = im.getChildByPath(name)
   119  		if err != nil {
   120  			log.Printf("getParentObj(%s): %v", name, err)
   121  		}
   122  		if parent == nil {
   123  			parent, err = root.ChildPathObject(name)
   124  			if err != nil {
   125  				log.Printf("error creating ChildPathObject(%s): %v", name, err)
   126  				errch <- err
   127  				parent = root
   128  			}
   129  		}
   130  		albumCache[name] = parent
   131  		if err = parent.SetAttrs("title", title, "tag", name); err != nil {
   132  			errch <- err
   133  		}
   134  		return parent
   135  	}
   136  
   137  	var workers sync.WaitGroup
   138  	worker := func() {
   139  		for img := range tbd {
   140  			parent := getParentObj(img.albumName, img.albumTitle)
   141  
   142  			fn := img.albumName + "/" + img.fileName
   143  			log.Printf("importing %s", fn)
   144  			fileRef, err := schema.WriteFileFromReader(im.host.Target(), fn, img.r)
   145  			img.r.Close()
   146  			if err != nil {
   147  				// FIXME(tgulacsi): cannot download movies
   148  				log.Printf("error downloading %s: %v", img.fileName, err)
   149  				continue
   150  			}
   151  			// parent will have an attr camliPath:img.fileName set to this permanode
   152  			obj, err := parent.ChildPathObject(img.fileName)
   153  			if err != nil {
   154  				errch <- err
   155  			}
   156  
   157  			if err = obj.SetAttrs(
   158  				"camliContent", fileRef.String(),
   159  				"album", img.albumTitle,
   160  				"tag", img.albumName,
   161  			); err != nil {
   162  				errch <- err
   163  			}
   164  		}
   165  		workers.Done()
   166  	}
   167  
   168  	workers.Add(parallelWorkers)
   169  	for i := 0; i < parallelWorkers; i++ {
   170  		go worker()
   171  	}
   172  
   173  	// decide whether we should import this image
   174  	filter := func(img imageFile) (bool, error) {
   175  		intrErr := func(e error) error {
   176  			if e != nil {
   177  				return e
   178  			}
   179  			if ctx.IsCanceled() {
   180  				return context.ErrCanceled
   181  			}
   182  			return nil
   183  		}
   184  		parent := getParentObj(img.albumName, img.albumTitle)
   185  		if parent != nil {
   186  			pn := parent.Attr("camliPath:" + img.fileName)
   187  			if pn != "" {
   188  				ref, ok := blob.Parse(pn)
   189  				if !ok {
   190  					return true, fmt.Errorf("cannot parse %s as blobRef", pn)
   191  				}
   192  				obj, err := im.host.ObjectFromRef(ref)
   193  				if err != nil {
   194  					return false, err
   195  				}
   196  				if obj != nil {
   197  					log.Printf("%s/%s already imported as %s.",
   198  						img.albumName, img.fileName, obj.PermanodeRef())
   199  					return false, intrErr(nil)
   200  				}
   201  			}
   202  		}
   203  		return true, intrErr(nil)
   204  	}
   205  
   206  	go iterItems(itemch, errch, filter, client, "default")
   207  	for {
   208  		select {
   209  		case err = <-errch:
   210  			close(tbd)
   211  			if err == context.ErrCanceled {
   212  				log.Printf("Picasa importer has been interrupted.")
   213  			} else {
   214  				log.Printf("Picasa importer error: %v", err)
   215  				workers.Wait()
   216  			}
   217  			return err
   218  		case <-ctx.Done():
   219  			log.Printf("Picasa importer has been interrupted.")
   220  			close(tbd)
   221  			return context.ErrCanceled
   222  		case img := <-itemch:
   223  			tbd <- img
   224  		}
   225  	}
   226  	close(tbd)
   227  	workers.Wait()
   228  	return nil
   229  }
   230  
   231  func (im *imp) getRootNode() (*importer.Object, error) {
   232  	root, err := im.host.RootObject()
   233  	if err != nil {
   234  		return nil, err
   235  	}
   236  
   237  	if root.Attr("title") == "" {
   238  		//FIXME(tgulacsi): we need the username, from somewhere
   239  		title := fmt.Sprintf("Picasa (%s)", "default")
   240  		if err := root.SetAttr("title", title); err != nil {
   241  			return nil, err
   242  		}
   243  	}
   244  	return root, nil
   245  }
   246  
   247  // getChildByPath searches for attribute camliPath:path and returns the object
   248  // to which this permanode points.
   249  // This is the reverse of imp.ChildPathObject.
   250  func (im *imp) getChildByPath(path string) (obj *importer.Object, err error) {
   251  	key := "camliPath:" + path
   252  	defer func() {
   253  		log.Printf("search for %s resulted in %v/%v", path, obj, err)
   254  	}()
   255  	res, e := im.host.Search().GetPermanodesWithAttr(&search.WithAttrRequest{
   256  		N:    2, // only expect 1
   257  		Attr: key,
   258  	})
   259  	log.Printf("searching for %s: %v, %v", key, res, e)
   260  	if e != nil {
   261  		err = e
   262  		log.Printf("getChildByPath searching GetPermanodesWithAttr: %v", err)
   263  		return nil, err
   264  	}
   265  	if len(res.WithAttr) == 0 {
   266  		return nil, nil
   267  	}
   268  	if len(res.WithAttr) > 1 {
   269  		err = fmt.Errorf("Found %d import roots for %q; want 1", len(res.WithAttr), path)
   270  		return nil, err
   271  	}
   272  	pn := res.WithAttr[0].Permanode
   273  	parent, e := im.host.ObjectFromRef(pn)
   274  	if e != nil {
   275  		err = e
   276  		return nil, err
   277  	}
   278  	br := parent.Attr(key)
   279  	pn, ok := blob.Parse(br)
   280  	if !ok {
   281  		err = fmt.Errorf("cannot parse %s (value of %s.%s) as blobRef",
   282  			br, parent, key)
   283  		return nil, err
   284  	}
   285  	obj, err = im.host.ObjectFromRef(pn)
   286  	return obj, err
   287  }
   288  
   289  type imageFile struct {
   290  	albumTitle, albumName string
   291  	fileName              string
   292  	ID                    string
   293  	r                     io.ReadCloser
   294  }
   295  
   296  type filterFunc func(imageFile) (bool, error)
   297  
   298  func iterItems(itemch chan<- imageFile, errch chan<- error,
   299  	filter filterFunc, client *http.Client, username string) {
   300  
   301  	defer close(itemch)
   302  
   303  	albums, err := picago.GetAlbums(client, username)
   304  	if err != nil {
   305  		errch <- err
   306  		return
   307  	}
   308  	gate := syncutil.NewGate(parallelAlbumRoutines)
   309  	for _, album := range albums {
   310  		photos, err := picago.GetPhotos(client, username, album.ID)
   311  		if err != nil {
   312  			select {
   313  			case errch <- err:
   314  			default:
   315  				return
   316  			}
   317  			continue
   318  		}
   319  		gate.Start()
   320  		go func(albumName, albumTitle string) {
   321  			defer gate.Done()
   322  			for _, photo := range photos {
   323  				img := imageFile{
   324  					albumTitle: albumTitle,
   325  					albumName:  albumName,
   326  					fileName:   photo.Filename(),
   327  					ID:         photo.ID,
   328  				}
   329  				ok, err := filter(img)
   330  				if err != nil {
   331  					errch <- err
   332  					return
   333  				}
   334  				if !ok {
   335  					continue
   336  				}
   337  
   338  				img.r, err = picago.DownloadPhoto(client, photo.URL)
   339  				if err != nil {
   340  					select {
   341  					case errch <- fmt.Errorf("Get(%s): %v", photo.URL, err):
   342  					default:
   343  						return
   344  					}
   345  					continue
   346  				}
   347  				itemch <- img
   348  			}
   349  		}(album.Name, album.Title)
   350  	}
   351  }