github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/pkg/importer/flickr/flickr.go (about)

     1  /*
     2  Copyright 2013 The Camlistore Authors
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package flickr implements an importer for flickr.com accounts.
    18  package flickr
    19  
    20  import (
    21  	"encoding/json"
    22  	"errors"
    23  	"fmt"
    24  	"io/ioutil"
    25  	"log"
    26  	"net/http"
    27  	"net/url"
    28  	"strings"
    29  
    30  	"camlistore.org/pkg/importer"
    31  	"camlistore.org/pkg/jsonconfig"
    32  	"camlistore.org/pkg/schema"
    33  	"camlistore.org/third_party/github.com/garyburd/go-oauth/oauth"
    34  )
    35  
    36  const (
    37  	apiURL = "http://api.flickr.com/services/rest/"
    38  )
    39  
    40  func init() {
    41  	importer.Register("flickr", newFromConfig)
    42  }
    43  
    44  type imp struct {
    45  	host *importer.Host
    46  	user *userInfo // nil if the user isn't authenticated
    47  }
    48  
    49  func newFromConfig(cfg jsonconfig.Obj, host *importer.Host) (importer.Importer, error) {
    50  	apiKey := cfg.RequiredString("apiKey")
    51  	if err := cfg.Validate(); err != nil {
    52  		return nil, err
    53  	}
    54  	parts := strings.Split(apiKey, ":")
    55  	if len(parts) != 2 {
    56  		return nil, fmt.Errorf("Flickr importer: Invalid apiKey configuration: %q", apiKey)
    57  	}
    58  	oauthClient.Credentials = oauth.Credentials{
    59  		Token:  parts[0],
    60  		Secret: parts[1],
    61  	}
    62  	return &imp{
    63  		host: host,
    64  	}, nil
    65  }
    66  
    67  func (im *imp) CanHandleURL(url string) bool { return false }
    68  func (im *imp) ImportURL(url string) error   { panic("unused") }
    69  
    70  func (im *imp) Prefix() string {
    71  	// This should only get called when we're importing, so it's OK to
    72  	// assume we're authenticated.
    73  	return fmt.Sprintf("flickr:%s", im.user.Id)
    74  }
    75  
    76  func (im *imp) String() string {
    77  	// We use this in logging when we're not authenticated, so it should do
    78  	// something reasonable in that case.
    79  	userId := "<unauthenticated>"
    80  	if im.user != nil {
    81  		userId = im.user.Id
    82  	}
    83  	return fmt.Sprintf("flickr:%s", userId)
    84  }
    85  
    86  func (im *imp) Run(intr importer.Interrupt) error {
    87  	if err := im.importPhotosets(); err != nil {
    88  		return err
    89  	}
    90  	if err := im.importPhotos(); err != nil {
    91  		return err
    92  	}
    93  	return nil
    94  }
    95  
    96  type photosetsGetList struct {
    97  	Photosets struct {
    98  		Page     int
    99  		Pages    int
   100  		Perpage  int
   101  		Photoset []*photosetsGetListItem
   102  	}
   103  }
   104  
   105  type photosetsGetListItem struct {
   106  	Id             string
   107  	PrimaryPhotoId string `json:"primary"`
   108  	Title          contentString
   109  	Description    contentString
   110  }
   111  
   112  type photosetsGetPhotos struct {
   113  	Photoset struct {
   114  		Id    string
   115  		Page  int `json:",string"`
   116  		Pages int
   117  		Photo []struct {
   118  			Id             string
   119  			Originalformat string
   120  		}
   121  	}
   122  }
   123  
   124  func (im *imp) importPhotosets() error {
   125  	resp := photosetsGetList{}
   126  	if err := im.flickrAPIRequest(&resp, "flickr.photosets.getList"); err != nil {
   127  		return err
   128  	}
   129  
   130  	setsNode, err := im.getTopLevelNode("sets", "Sets")
   131  	if err != nil {
   132  		return err
   133  	}
   134  	log.Printf("Importing %d sets", len(resp.Photosets.Photoset))
   135  
   136  	for _, item := range resp.Photosets.Photoset {
   137  		for page := 1; page >= 1; {
   138  			page, err = im.importPhotoset(setsNode, item, page)
   139  			if err != nil {
   140  				log.Printf("Flickr importer: error importing photoset %s: %s", item.Id, err)
   141  				continue
   142  			}
   143  		}
   144  	}
   145  	return nil
   146  }
   147  
   148  func (im *imp) importPhotoset(parent *importer.Object, photoset *photosetsGetListItem, page int) (int, error) {
   149  	photosetNode, err := parent.ChildPathObject(photoset.Id)
   150  	if err != nil {
   151  		return 0, err
   152  	}
   153  
   154  	if err := photosetNode.SetAttrs(
   155  		"flickrId", photoset.Title.Content,
   156  		"title", photoset.Title.Content,
   157  		"description", photoset.Description.Content,
   158  		"primaryPhotoId", photoset.PrimaryPhotoId); err != nil {
   159  		return 0, err
   160  	}
   161  
   162  	resp := photosetsGetPhotos{}
   163  	if err := im.flickrAPIRequest(&resp, "flickr.photosets.getPhotos",
   164  		"page", fmt.Sprintf("%d", page), "photoset_id", photoset.Id, "extras", "original_format"); err != nil {
   165  		return 0, err
   166  	}
   167  
   168  	log.Printf("Importing page %d from photoset %s", page, photoset.Id)
   169  
   170  	photosNode, err := im.getPhotosNode()
   171  	if err != nil {
   172  		return 0, err
   173  	}
   174  
   175  	for _, item := range resp.Photoset.Photo {
   176  		filename := fmt.Sprintf("%s.%s", item.Id, item.Originalformat)
   177  		photoNode, err := photosNode.ChildPathObject(filename)
   178  		if err != nil {
   179  			log.Printf("Flickr importer: error finding photo node %s for addition to photoset %s: %s",
   180  				item.Id, photoset.Id, err)
   181  			continue
   182  		}
   183  		if err := photosetNode.SetAttr("camliPath:"+filename, photoNode.PermanodeRef().String()); err != nil {
   184  			log.Printf("Flickr importer: error adding photo %s to photoset %s: %s",
   185  				item.Id, photoset.Id, err)
   186  		}
   187  	}
   188  
   189  	if resp.Photoset.Page < resp.Photoset.Pages {
   190  		return page + 1, nil
   191  	} else {
   192  		return 0, nil
   193  	}
   194  }
   195  
   196  type photosSearch struct {
   197  	Photos struct {
   198  		Page    int
   199  		Pages   int
   200  		Perpage int
   201  		Total   int `json:",string"`
   202  		Photo   []*photosSearchItem
   203  	}
   204  
   205  	Stat string
   206  }
   207  
   208  type photosSearchItem struct {
   209  	Id             string
   210  	Title          string
   211  	Ispublic       int
   212  	Isfriend       int
   213  	Isfamily       int
   214  	Description    contentString
   215  	Dateupload     string
   216  	Datetaken      string
   217  	Originalformat string
   218  	Lastupdate     string
   219  	Latitude       float32
   220  	Longitude      float32
   221  	Tags           string
   222  	Machinetags    string `json:"machine_tags"`
   223  	Views          string
   224  	Media          string
   225  	URL            string `json:"url_o"`
   226  }
   227  
   228  func (im *imp) importPhotos() error {
   229  	for page := 1; page >= 1; {
   230  		var err error
   231  		page, err = im.importPhotosPage(page)
   232  		if err != nil {
   233  			return err
   234  		}
   235  	}
   236  	return nil
   237  }
   238  
   239  func (im *imp) importPhotosPage(page int) (int, error) {
   240  	resp := photosSearch{}
   241  	if err := im.flickrAPIRequest(&resp, "flickr.people.getPhotos", "page", fmt.Sprintf("%d", page),
   242  		"extras", "description, date_upload, date_taken, original_format, last_update, geo, tags, machine_tags, views, media, url_o"); err != nil {
   243  		return 0, err
   244  	}
   245  
   246  	photosNode, err := im.getPhotosNode()
   247  	if err != nil {
   248  		return 0, err
   249  	}
   250  	log.Printf("Importing %d photos on page %d of %d", len(resp.Photos.Photo), page, resp.Photos.Pages)
   251  
   252  	for _, item := range resp.Photos.Photo {
   253  		if err := im.importPhoto(photosNode, item); err != nil {
   254  			log.Printf("Flickr importer: error importing %s: %s", item.Id, err)
   255  			continue
   256  		}
   257  	}
   258  
   259  	if resp.Photos.Pages > resp.Photos.Page {
   260  		return page + 1, nil
   261  	} else {
   262  		return 0, nil
   263  	}
   264  }
   265  
   266  // TODO(aa):
   267  // * Parallelize: http://golang.org/doc/effective_go.html#concurrency
   268  // * Do more than one "page" worth of results
   269  // * Report progress and errors back through host interface
   270  // * All the rest of the metadata (see photoMeta)
   271  // * Conflicts: For all metadata changes, prefer any non-imported claims
   272  // * Test!
   273  func (im *imp) importPhoto(parent *importer.Object, photo *photosSearchItem) error {
   274  	filename := fmt.Sprintf("%s.%s", photo.Id, photo.Originalformat)
   275  	photoNode, err := parent.ChildPathObject(filename)
   276  	if err != nil {
   277  		return err
   278  	}
   279  
   280  	// Import all the metadata. SetAttrs() is a no-op if the value hasn't changed, so there's no cost to doing these on every run.
   281  	// And this way if we add more things to import, they will get picked up.
   282  	if err := photoNode.SetAttrs(
   283  		"flickrId", photo.Id,
   284  		"title", photo.Title,
   285  		"description", photo.Description.Content); err != nil {
   286  		return err
   287  	}
   288  
   289  	// Import the photo itself. Since it is expensive to fetch the image, we store its lastupdate and only refetch if it might have changed.
   290  	if photoNode.Attr("flickrLastupdate") == photo.Lastupdate {
   291  		return nil
   292  	}
   293  	res, err := im.flickrRequest(photo.URL, url.Values{})
   294  	if err != nil {
   295  		log.Printf("Flickr importer: Could not fetch %s: %s", photo.URL, err)
   296  		return err
   297  	}
   298  	defer res.Body.Close()
   299  
   300  	fileRef, err := schema.WriteFileFromReader(im.host.Target(), filename, res.Body)
   301  	if err != nil {
   302  		return err
   303  	}
   304  	if err := photoNode.SetAttr("camliContent", fileRef.String()); err != nil {
   305  		return err
   306  	}
   307  	// Write lastupdate last, so that if any of the preceding fails, we will try again next time.
   308  	if err := photoNode.SetAttr("flickrLastupdate", photo.Lastupdate); err != nil {
   309  		return err
   310  	}
   311  
   312  	return nil
   313  }
   314  
   315  func (im *imp) getPhotosNode() (*importer.Object, error) {
   316  	return im.getTopLevelNode("photos", "Photos")
   317  }
   318  
   319  func (im *imp) getTopLevelNode(path string, title string) (*importer.Object, error) {
   320  	root, err := im.getRootNode()
   321  	if err != nil {
   322  		return nil, err
   323  	}
   324  
   325  	photos, err := root.ChildPathObject(path)
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  
   330  	if err := photos.SetAttr("title", title); err != nil {
   331  		return nil, err
   332  	}
   333  	return photos, nil
   334  }
   335  
   336  func (im *imp) getRootNode() (*importer.Object, error) {
   337  	root, err := im.host.RootObject()
   338  	if err != nil {
   339  		return nil, err
   340  	}
   341  
   342  	if root.Attr("title") == "" {
   343  		title := fmt.Sprintf("Flickr (%s)", im.user.Username)
   344  		if err := root.SetAttr("title", title); err != nil {
   345  			return nil, err
   346  		}
   347  	}
   348  	return root, nil
   349  }
   350  
   351  func (im *imp) flickrAPIRequest(result interface{}, method string, keyval ...string) error {
   352  	if len(keyval)%2 == 1 {
   353  		panic("Incorrect number of keyval arguments")
   354  	}
   355  
   356  	if im.user == nil {
   357  		return fmt.Errorf("No authenticated user")
   358  	}
   359  
   360  	form := url.Values{}
   361  	form.Set("method", method)
   362  	form.Set("format", "json")
   363  	form.Set("nojsoncallback", "1")
   364  	form.Set("user_id", im.user.Id)
   365  	for i := 0; i < len(keyval); i += 2 {
   366  		form.Set(keyval[i], keyval[i+1])
   367  	}
   368  
   369  	res, err := im.flickrRequest(apiURL, form)
   370  	if err != nil {
   371  		return err
   372  	}
   373  	defer res.Body.Close()
   374  	data, err := ioutil.ReadAll(res.Body)
   375  	if err != nil {
   376  		return err
   377  	}
   378  	err = json.Unmarshal(data, result)
   379  	if err != nil {
   380  		log.Println("Response data:", string(data))
   381  	}
   382  	return err
   383  }
   384  
   385  func (im *imp) flickrRequest(url string, form url.Values) (*http.Response, error) {
   386  	if im.user == nil {
   387  		return nil, errors.New("Not logged in. Go to /importer-flickr/login.")
   388  	}
   389  
   390  	res, err := oauthClient.Get(im.host.HTTPClient(), im.user.Cred, url, form)
   391  	if err != nil {
   392  		return nil, err
   393  	}
   394  
   395  	if res.StatusCode != http.StatusOK {
   396  		return nil, fmt.Errorf("Auth request failed with: %s", res.Status)
   397  	}
   398  
   399  	return res, nil
   400  }
   401  
   402  type contentString struct {
   403  	Content string `json:"_content"`
   404  }