github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/importer/flickr/flickr.go (about)

     1  /*
     2  Copyright 2013 The Camlistore Authors
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package flickr implements an importer for flickr.com accounts.
    18  package flickr
    19  
    20  import (
    21  	"errors"
    22  	"fmt"
    23  	"log"
    24  	"net/http"
    25  	"net/url"
    26  	"strings"
    27  
    28  	"camlistore.org/pkg/context"
    29  	"camlistore.org/pkg/httputil"
    30  	"camlistore.org/pkg/importer"
    31  	"camlistore.org/pkg/jsonconfig"
    32  	"camlistore.org/pkg/schema"
    33  	"camlistore.org/third_party/github.com/garyburd/go-oauth/oauth"
    34  )
    35  
    36  const (
    37  	apiURL = "http://api.flickr.com/services/rest/"
    38  )
    39  
    40  func init() {
    41  	importer.Register("flickr", newFromConfig)
    42  }
    43  
    44  type imp struct {
    45  	host *importer.Host
    46  	user *userInfo // nil if the user isn't authenticated
    47  }
    48  
    49  func newFromConfig(cfg jsonconfig.Obj, host *importer.Host) (importer.Importer, error) {
    50  	apiKey := cfg.RequiredString("apiKey")
    51  	if err := cfg.Validate(); err != nil {
    52  		return nil, err
    53  	}
    54  	parts := strings.Split(apiKey, ":")
    55  	if len(parts) != 2 {
    56  		return nil, fmt.Errorf("Flickr importer: Invalid apiKey configuration: %q", apiKey)
    57  	}
    58  	oauthClient.Credentials = oauth.Credentials{
    59  		Token:  parts[0],
    60  		Secret: parts[1],
    61  	}
    62  	return &imp{
    63  		host: host,
    64  	}, nil
    65  }
    66  
    67  func (im *imp) CanHandleURL(url string) bool { return false }
    68  func (im *imp) ImportURL(url string) error   { panic("unused") }
    69  
    70  func (im *imp) Prefix() string {
    71  	// This should only get called when we're importing, so it's OK to
    72  	// assume we're authenticated.
    73  	return fmt.Sprintf("flickr:%s", im.user.Id)
    74  }
    75  
    76  func (im *imp) String() string {
    77  	// We use this in logging when we're not authenticated, so it should do
    78  	// something reasonable in that case.
    79  	userId := "<unauthenticated>"
    80  	if im.user != nil {
    81  		userId = im.user.Id
    82  	}
    83  	return fmt.Sprintf("flickr:%s", userId)
    84  }
    85  
    86  func (im *imp) Run(ctx *context.Context) error {
    87  	// TODO: plumb context through to following two calls and monitor it.
    88  	if err := im.importPhotosets(); err != nil {
    89  		return err
    90  	}
    91  	if err := im.importPhotos(); err != nil {
    92  		return err
    93  	}
    94  	return nil
    95  }
    96  
    97  type photosetsGetList struct {
    98  	Photosets struct {
    99  		Page     int
   100  		Pages    int
   101  		Perpage  int
   102  		Photoset []*photosetsGetListItem
   103  	}
   104  }
   105  
   106  type photosetsGetListItem struct {
   107  	Id             string
   108  	PrimaryPhotoId string `json:"primary"`
   109  	Title          contentString
   110  	Description    contentString
   111  }
   112  
   113  type photosetsGetPhotos struct {
   114  	Photoset struct {
   115  		Id    string
   116  		Page  int `json:",string"`
   117  		Pages int
   118  		Photo []struct {
   119  			Id             string
   120  			Originalformat string
   121  		}
   122  	}
   123  }
   124  
   125  func (im *imp) importPhotosets() error {
   126  	resp := photosetsGetList{}
   127  	if err := im.flickrAPIRequest(&resp, "flickr.photosets.getList"); err != nil {
   128  		return err
   129  	}
   130  
   131  	setsNode, err := im.getTopLevelNode("sets", "Sets")
   132  	if err != nil {
   133  		return err
   134  	}
   135  	log.Printf("Importing %d sets", len(resp.Photosets.Photoset))
   136  
   137  	for _, item := range resp.Photosets.Photoset {
   138  		for page := 1; page >= 1; {
   139  			page, err = im.importPhotoset(setsNode, item, page)
   140  			if err != nil {
   141  				log.Printf("Flickr importer: error importing photoset %s: %s", item.Id, err)
   142  				continue
   143  			}
   144  		}
   145  	}
   146  	return nil
   147  }
   148  
   149  func (im *imp) importPhotoset(parent *importer.Object, photoset *photosetsGetListItem, page int) (int, error) {
   150  	photosetNode, err := parent.ChildPathObject(photoset.Id)
   151  	if err != nil {
   152  		return 0, err
   153  	}
   154  
   155  	if err := photosetNode.SetAttrs(
   156  		"flickrId", photoset.Title.Content,
   157  		"title", photoset.Title.Content,
   158  		"description", photoset.Description.Content,
   159  		"primaryPhotoId", photoset.PrimaryPhotoId); err != nil {
   160  		return 0, err
   161  	}
   162  
   163  	resp := photosetsGetPhotos{}
   164  	if err := im.flickrAPIRequest(&resp, "flickr.photosets.getPhotos",
   165  		"page", fmt.Sprintf("%d", page), "photoset_id", photoset.Id, "extras", "original_format"); err != nil {
   166  		return 0, err
   167  	}
   168  
   169  	log.Printf("Importing page %d from photoset %s", page, photoset.Id)
   170  
   171  	photosNode, err := im.getPhotosNode()
   172  	if err != nil {
   173  		return 0, err
   174  	}
   175  
   176  	for _, item := range resp.Photoset.Photo {
   177  		filename := fmt.Sprintf("%s.%s", item.Id, item.Originalformat)
   178  		photoNode, err := photosNode.ChildPathObject(filename)
   179  		if err != nil {
   180  			log.Printf("Flickr importer: error finding photo node %s for addition to photoset %s: %s",
   181  				item.Id, photoset.Id, err)
   182  			continue
   183  		}
   184  		if err := photosetNode.SetAttr("camliPath:"+filename, photoNode.PermanodeRef().String()); err != nil {
   185  			log.Printf("Flickr importer: error adding photo %s to photoset %s: %s",
   186  				item.Id, photoset.Id, err)
   187  		}
   188  	}
   189  
   190  	if resp.Photoset.Page < resp.Photoset.Pages {
   191  		return page + 1, nil
   192  	} else {
   193  		return 0, nil
   194  	}
   195  }
   196  
   197  type photosSearch struct {
   198  	Photos struct {
   199  		Page    int
   200  		Pages   int
   201  		Perpage int
   202  		Total   int `json:",string"`
   203  		Photo   []*photosSearchItem
   204  	}
   205  
   206  	Stat string
   207  }
   208  
   209  type photosSearchItem struct {
   210  	Id             string
   211  	Title          string
   212  	Ispublic       int
   213  	Isfriend       int
   214  	Isfamily       int
   215  	Description    contentString
   216  	Dateupload     string
   217  	Datetaken      string
   218  	Originalformat string
   219  	Lastupdate     string
   220  	Latitude       float32
   221  	Longitude      float32
   222  	Tags           string
   223  	Machinetags    string `json:"machine_tags"`
   224  	Views          string
   225  	Media          string
   226  	URL            string `json:"url_o"`
   227  }
   228  
   229  func (im *imp) importPhotos() error {
   230  	for page := 1; page >= 1; {
   231  		var err error
   232  		page, err = im.importPhotosPage(page)
   233  		if err != nil {
   234  			return err
   235  		}
   236  	}
   237  	return nil
   238  }
   239  
   240  func (im *imp) importPhotosPage(page int) (int, error) {
   241  	resp := photosSearch{}
   242  	if err := im.flickrAPIRequest(&resp, "flickr.people.getPhotos", "page", fmt.Sprintf("%d", page),
   243  		"extras", "description, date_upload, date_taken, original_format, last_update, geo, tags, machine_tags, views, media, url_o"); err != nil {
   244  		return 0, err
   245  	}
   246  
   247  	photosNode, err := im.getPhotosNode()
   248  	if err != nil {
   249  		return 0, err
   250  	}
   251  	log.Printf("Importing %d photos on page %d of %d", len(resp.Photos.Photo), page, resp.Photos.Pages)
   252  
   253  	for _, item := range resp.Photos.Photo {
   254  		if err := im.importPhoto(photosNode, item); err != nil {
   255  			log.Printf("Flickr importer: error importing %s: %s", item.Id, err)
   256  			continue
   257  		}
   258  	}
   259  
   260  	if resp.Photos.Pages > resp.Photos.Page {
   261  		return page + 1, nil
   262  	} else {
   263  		return 0, nil
   264  	}
   265  }
   266  
   267  // TODO(aa):
   268  // * Parallelize: http://golang.org/doc/effective_go.html#concurrency
   269  // * Do more than one "page" worth of results
   270  // * Report progress and errors back through host interface
   271  // * All the rest of the metadata (see photoMeta)
   272  // * Conflicts: For all metadata changes, prefer any non-imported claims
   273  // * Test!
   274  func (im *imp) importPhoto(parent *importer.Object, photo *photosSearchItem) error {
   275  	filename := fmt.Sprintf("%s.%s", photo.Id, photo.Originalformat)
   276  	photoNode, err := parent.ChildPathObject(filename)
   277  	if err != nil {
   278  		return err
   279  	}
   280  
   281  	// Import all the metadata. SetAttrs() is a no-op if the value hasn't changed, so there's no cost to doing these on every run.
   282  	// And this way if we add more things to import, they will get picked up.
   283  	if err := photoNode.SetAttrs(
   284  		"flickrId", photo.Id,
   285  		"title", photo.Title,
   286  		"description", photo.Description.Content); err != nil {
   287  		return err
   288  	}
   289  
   290  	// Import the photo itself. Since it is expensive to fetch the image, we store its lastupdate and only refetch if it might have changed.
   291  	if photoNode.Attr("flickrLastupdate") == photo.Lastupdate {
   292  		return nil
   293  	}
   294  	res, err := im.flickrRequest(photo.URL, url.Values{})
   295  	if err != nil {
   296  		log.Printf("Flickr importer: Could not fetch %s: %s", photo.URL, err)
   297  		return err
   298  	}
   299  	defer res.Body.Close()
   300  
   301  	fileRef, err := schema.WriteFileFromReader(im.host.Target(), filename, res.Body)
   302  	if err != nil {
   303  		return err
   304  	}
   305  	if err := photoNode.SetAttr("camliContent", fileRef.String()); err != nil {
   306  		return err
   307  	}
   308  	// Write lastupdate last, so that if any of the preceding fails, we will try again next time.
   309  	if err := photoNode.SetAttr("flickrLastupdate", photo.Lastupdate); err != nil {
   310  		return err
   311  	}
   312  
   313  	return nil
   314  }
   315  
   316  func (im *imp) getPhotosNode() (*importer.Object, error) {
   317  	return im.getTopLevelNode("photos", "Photos")
   318  }
   319  
   320  func (im *imp) getTopLevelNode(path string, title string) (*importer.Object, error) {
   321  	root, err := im.getRootNode()
   322  	if err != nil {
   323  		return nil, err
   324  	}
   325  
   326  	photos, err := root.ChildPathObject(path)
   327  	if err != nil {
   328  		return nil, err
   329  	}
   330  
   331  	if err := photos.SetAttr("title", title); err != nil {
   332  		return nil, err
   333  	}
   334  	return photos, nil
   335  }
   336  
   337  func (im *imp) getRootNode() (*importer.Object, error) {
   338  	root, err := im.host.RootObject()
   339  	if err != nil {
   340  		return nil, err
   341  	}
   342  
   343  	if root.Attr("title") == "" {
   344  		title := fmt.Sprintf("Flickr (%s)", im.user.Username)
   345  		if err := root.SetAttr("title", title); err != nil {
   346  			return nil, err
   347  		}
   348  	}
   349  	return root, nil
   350  }
   351  
   352  func (im *imp) flickrAPIRequest(result interface{}, method string, keyval ...string) error {
   353  	if len(keyval)%2 == 1 {
   354  		panic("Incorrect number of keyval arguments")
   355  	}
   356  
   357  	if im.user == nil {
   358  		return fmt.Errorf("No authenticated user")
   359  	}
   360  
   361  	form := url.Values{}
   362  	form.Set("method", method)
   363  	form.Set("format", "json")
   364  	form.Set("nojsoncallback", "1")
   365  	form.Set("user_id", im.user.Id)
   366  	for i := 0; i < len(keyval); i += 2 {
   367  		form.Set(keyval[i], keyval[i+1])
   368  	}
   369  
   370  	res, err := im.flickrRequest(apiURL, form)
   371  	if err != nil {
   372  		return err
   373  	}
   374  	err = httputil.DecodeJSON(res, result)
   375  	if err != nil {
   376  		log.Printf("Error parsing response for %s: %s", apiURL, err)
   377  	}
   378  	return err
   379  }
   380  
   381  func (im *imp) flickrRequest(url string, form url.Values) (*http.Response, error) {
   382  	if im.user == nil {
   383  		return nil, errors.New("Not logged in. Go to /importer-flickr/login.")
   384  	}
   385  
   386  	res, err := oauthClient.Get(im.host.HTTPClient(), im.user.Cred, url, form)
   387  	if err != nil {
   388  		return nil, err
   389  	}
   390  
   391  	if res.StatusCode != http.StatusOK {
   392  		return nil, fmt.Errorf("Auth request failed with: %s", res.Status)
   393  	}
   394  
   395  	return res, nil
   396  }
   397  
   398  type contentString struct {
   399  	Content string `json:"_content"`
   400  }