github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/repo/0_doc.go (about)

     1  // Package repo takes http JSON commands;
     2  // downloading html files in parallel from the designated source;
     3  // making them available via quasi-static http fileserver.
     4  package repo
     5  
     6  //
     7  // Further inspiration could be taken from github.com/lox/httpcache
     8  //
     9  //  Todo:
    10  //		Re-integrate RSS feeds into the crawling
    11  //
    12  //
    13  
    14  import (
    15  	"fmt"
    16  	"log"
    17  )
    18  
    19  /*
    20  
    21  [{ 	'Host':           'www.handelsblatt.com',
    22   	'SearchPrefix':   '/politik/international',
    23   	'RssXMLURI':      '/contentexport/feed/schlagzeilen',
    24  }]
    25  
    26  
    27  
    28  curl -X POST -d "[{ \"Host\": \"www.handelsblatt.com\",  \"SearchPrefix\":  \"/politik/deutschland\"         }]"  localhost:8085/fetch/command-receive
    29  curl -X POST -d "[{ \"Host\": \"www.welt.de\"         ,  \"SearchPrefix\":  \"/wirtschaft/deutschland\"      }]"  localhost:8085/fetch/command-receive
    30  curl -X POST -d "[{ \"Host\": \"www.economist.com\"   ,  \"SearchPrefix\":  \"/news/business-and-finance\"   }]"  localhost:8085/fetch/command-receive
    31  
    32  curl -X POST -d "[{ \"Host\": \"test.economist.com\"  ,  \"SearchPrefix\":  \"/news/business-and-finance\"   }]"  localhost:8085/fetch/command-receive
    33  curl -X POST -d "[{ \"Host\": \"test.economist.com\"  ,  \"SearchPrefix\":  \"/\"                            }]"  localhost:8085/fetch/command-receive
    34  
    35  curl -X POST -d "[{ \"Host\": \"www.welt.de\",           \"SearchPrefix\": \"/wirtschaft/deutschland\" ,  \"RssXMLURI\": \"/wirtschaft/?service=Rss\" }]" localhost:8085/fetch/command-receive
    36  
    37  
    38  curl localhost:8085/fetch/similar?uri-x=www.welt.de/politik/ausland/article146154432/Tuerkische-Bodentruppen-marschieren-im-Nordirak-ein.html
    39  
    40  curl --data url-x=a.com  localhost:8085/fetch/similar
    41  curl --data url-x=https://www.welt.de/politik/ausland/article146154432/Tuerkische-Bodentruppen-marschieren-im-Nordirak-ein.html  localhost:8085/fetch/similar
    42  curl --data url-x=http://www.economist.com/news/britain/21663648-hard-times-hard-hats-making-britain-make-things-again-proving-difficult  localhost:8085/fetch/similar
    43  curl --data url-x=http://www.economist.com/news/americas/21661804-gender-equality-good-economic-growth-girl-power  localhost:8085/fetch/similar
    44  
    45  curl --data "cnt=1&url-x=http://www.economist.com/news/americas/21661804-gender-equality-good-economic-growth-girl-power"  localhost:8085/fetch/similar
    46  
    47  */
    48  
    49  var pf = fmt.Printf
    50  var pfRestore = fmt.Printf
    51  
    52  var spf = fmt.Sprintf
    53  var wpf = fmt.Fprintf
    54  
    55  var lpf = log.Printf