github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/t_parsing_test.go (about)

     1  // +build parsing
     2  // go test -tags=parsing
     3  
     4  package domclean2
     5  
     6  import (
     7  	"fmt"
     8  	"log"
     9  	"net/http"
    10  	"os"
    11  	"path"
    12  	"testing"
    13  	"time"
    14  
    15  	"appengine/aetest"
    16  
    17  	"github.com/pbberlin/tools/net/http/fetch"
    18  	"github.com/pbberlin/tools/net/http/fileserver"
    19  	"github.com/pbberlin/tools/net/http/loghttp"
    20  	"github.com/pbberlin/tools/net/http/repo"
    21  	"github.com/pbberlin/tools/net/http/routes"
    22  	"github.com/pbberlin/tools/sort/sortmap"
    23  	"github.com/pbberlin/tools/stringspb"
    24  )
    25  
    26  const numTotal = 3 // comparable html docs
    27  const stageMax = 3 // weedstages
    28  
    29  const cTestHostOwn = "localhost:63222"
    30  
    31  var hostWithPref = routes.AppHost() + repo.UriMountNameY
    32  
    33  func prepare(t *testing.T) aetest.Context {
    34  
    35  	lg, lge := loghttp.Logger(nil, nil)
    36  	_ = lg
    37  
    38  	c, err := aetest.NewContext(nil)
    39  	if err != nil {
    40  		lge(err)
    41  		t.Fatal(err)
    42  	}
    43  
    44  	serveFile := func(w http.ResponseWriter, r *http.Request, m map[string]interface{}) {
    45  		fs1 := repo.GetFS(c)
    46  		fileserver.FsiFileServer(w, r, fileserver.Options{FS: fs1, Prefix: repo.UriMountNameY})
    47  	}
    48  	http.HandleFunc(repo.UriMountNameY, loghttp.Adapter(serveFile))
    49  
    50  	go func() {
    51  		log.Fatal(
    52  			http.ListenAndServe(cTestHostOwn, nil),
    53  		)
    54  	}()
    55  
    56  	return c
    57  
    58  }
    59  
    60  func Test1(t *testing.T) {
    61  
    62  	lg, lge := loghttp.Logger(nil, nil)
    63  
    64  	// c := prepare(t)
    65  	// defer c.Close()
    66  
    67  	lg("waiting for webserver")
    68  	time.Sleep(2 * time.Millisecond)
    69  
    70  	remoteHostname := "www.welt.de"
    71  
    72  	dirs1, _, msg, err := fileserver.GetDirContents(hostWithPref, remoteHostname)
    73  	if err != nil {
    74  		lge(err)
    75  		lg("%s", msg)
    76  	}
    77  
    78  	lg("dirs1")
    79  	for _, v := range dirs1 {
    80  		lg("    %v", v)
    81  	}
    82  
    83  	least3Files := []string{}
    84  	for _, v1 := range dirs1 {
    85  
    86  		dirs2, fils2, msg, err := fileserver.GetDirContents(hostWithPref, path.Join(remoteHostname, v1))
    87  		_ = dirs2
    88  		if err != nil {
    89  			lge(err)
    90  			lg("%s", msg)
    91  		}
    92  		// lg("  dirs2 %v", stringspb.IndentedDump(dirs2))
    93  		// lg("  fils2 %v", stringspb.IndentedDump(fils2))
    94  
    95  		if len(fils2) > numTotal-1 {
    96  			for i2, v2 := range fils2 {
    97  				least3Files = append(least3Files, path.Join(remoteHostname, v1, v2))
    98  				if i2 == numTotal-1 {
    99  					break
   100  				}
   101  			}
   102  			break
   103  		}
   104  	}
   105  
   106  	if len(least3Files) < numTotal {
   107  		lg("not enough files in rss fetcher cache")
   108  		return
   109  	}
   110  
   111  	lg("fils2")
   112  	for _, v := range least3Files {
   113  		lg("    %v", v)
   114  	}
   115  
   116  	logdir := prepareLogDir()
   117  
   118  	iter := make([]int, numTotal)
   119  
   120  	for i, _ := range iter {
   121  
   122  		surl := spf("%v/%v", hostWithPref, least3Files[i])
   123  
   124  		fNamer := FileNamer(logdir, i)
   125  		fnKey := fNamer() // first call yields key
   126  		_ = fnKey
   127  
   128  		resBytes, effUrl, err := fetch.UrlGetter(nil, fetch.Options{URL: surl})
   129  		if err != nil {
   130  			lge(err)
   131  			return
   132  		}
   133  		lg("fetched %4.1fkB from %v", float64(len(resBytes))/1024, stringspb.ToLenR(effUrl.String(), 60))
   134  		opts := CleaningOptions{Proxify: true}
   135  		opts.FNamer = fNamer
   136  		opts.RemoteHost = remoteHostname
   137  		doc, err := DomClean(resBytes, opts)
   138  		lge(err)
   139  		_ = doc
   140  
   141  	}
   142  
   143  	// statistics on elements and attributes
   144  	sorted1 := sortmap.SortMapByCount(attrDistinct)
   145  	sorted1.Print(6)
   146  	fmt.Println()
   147  	sorted2 := sortmap.SortMapByCount(nodeDistinct)
   148  	sorted2.Print(6)
   149  
   150  	pf("correct finish\n")
   151  
   152  }
   153  
   154  func prepareLogDir() string {
   155  
   156  	lg, lge := loghttp.Logger(nil, nil)
   157  
   158  	logdir := "outp"
   159  	lg("logdir is %v ", logdir)
   160  
   161  	// sweep previous
   162  	rmPath := spf("./%v/", logdir)
   163  	err := os.RemoveAll(rmPath)
   164  	if err != nil {
   165  		lge(err)
   166  		os.Exit(1)
   167  	}
   168  	lg("removed %q", rmPath)
   169  
   170  	// create anew
   171  	err = os.Mkdir(logdir, 0755)
   172  	if err != nil && !os.IsExist(err) {
   173  		lge(err)
   174  		os.Exit(1)
   175  	}
   176  
   177  	return logdir
   178  
   179  }