github.com/apache/beam/sdks/v2@v2.48.2/go/examples/stringsplit/stringsplit.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  // An example of using a Splittable DoFn in the Go SDK with a portable runner.
    17  //
    18  // The following instructions describe how to execute this example in the
    19  // Flink local runner.
    20  //
    21  // 1. From a command line, navigate to the top-level beam/ directory and run
    22  // the Flink job server:
    23  //
    24  //	./gradlew :runners:flink:1.13:job-server:runShadow -Djob-host=localhost -Dflink-master=local
    25  //
    26  // 2. The job server is ready to receive jobs once it outputs a log like the
    27  // following: `JobService started on localhost:8099`. Take note of the endpoint
    28  // in that log message.
    29  //
    30  // 3. While the job server is running in one command line window, create a
    31  // second one in the same directory and run this example with the following
    32  // command, using the endpoint you noted from step 2:
    33  //
    34  //	go run sdks/go/examples/stringsplit/stringsplit.go --runner=universal --endpoint=localhost:8099
    35  //
    36  // 4. Once the pipeline is complete, the job server can be closed with ctrl+C.
    37  // To check the output of the pipeline, search the job server logs for the
    38  // phrase "StringSplit Output".
    39  package main
    40  
    41  // beam-playground:
    42  //   name: StringSplit
    43  //   description: An example of using a Splittable DoFn in the Go SDK with a portable runner.
    44  //   multifile: false
    45  //   context_line: 61
    46  //   categories:
    47  //     - Debugging
    48  //     - Flatten
    49  //   complexity: MEDIUM
    50  //   tags:
    51  //     - pipeline
    52  //     - split
    53  //     - runner
    54  
    55  import (
    56  	"context"
    57  	"flag"
    58  	"time"
    59  
    60  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    61  	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/sdf"
    62  	"github.com/apache/beam/sdks/v2/go/pkg/beam/io/rtrackers/offsetrange"
    63  	"github.com/apache/beam/sdks/v2/go/pkg/beam/log"
    64  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    65  	"github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx"
    66  )
    67  
    68  func init() {
    69  	register.DoFn4x0[context.Context, *sdf.LockRTracker, string, func(string)](&StringSplitFn{})
    70  	register.DoFn2x0[context.Context, string](&LogFn{})
    71  	register.Emitter1[string]()
    72  }
    73  
    74  // StringSplitFn is a Splittable DoFn that splits strings into substrings of the
    75  // specified size (for example, to be able to fit them in a small buffer).
    76  // See ProcessElement for more details.
    77  type StringSplitFn struct {
    78  	BufSize int64
    79  }
    80  
    81  // CreateInitialRestriction creates an offset range restriction for each element
    82  // with the size of the restriction corresponding to the length of the string.
    83  func (fn *StringSplitFn) CreateInitialRestriction(s string) offsetrange.Restriction {
    84  	rest := offsetrange.Restriction{Start: 0, End: int64(len(s))}
    85  	log.Debugf(context.Background(), "StringSplit CreateInitialRestriction: %v", rest)
    86  	return rest
    87  }
    88  
    89  // SplitRestriction performs initial splits so that each restriction is split
    90  // into 5.
    91  func (fn *StringSplitFn) SplitRestriction(_ string, rest offsetrange.Restriction) []offsetrange.Restriction {
    92  	splits := rest.EvenSplits(5)
    93  	log.Debugf(context.Background(), "StringSplit SplitRestrictions: %v -> %v", rest, splits)
    94  	return splits
    95  }
    96  
    97  // RestrictionSize returns the size as the difference between the restriction's
    98  // start and end.
    99  func (fn *StringSplitFn) RestrictionSize(_ string, rest offsetrange.Restriction) float64 {
   100  	size := rest.Size()
   101  	log.Debugf(context.Background(), "StringSplit RestrictionSize: %v -> %v", rest, size)
   102  	return size
   103  }
   104  
   105  // CreateTracker creates an offset range restriction tracker out of the offset
   106  // range restriction, and wraps it a thread-safe restriction tracker.
   107  func (fn *StringSplitFn) CreateTracker(rest offsetrange.Restriction) *sdf.LockRTracker {
   108  	return sdf.NewLockRTracker(offsetrange.NewTracker(rest))
   109  }
   110  
   111  // ProcessElement splits a string into substrings of a specified size (set in
   112  // StringSplitFn.BufSize).
   113  //
   114  // Note that the substring blocks are not guaranteed to line up with the
   115  // restriction boundaries. ProcessElement is expected to emit any substring
   116  // block that begins in its restriction, even if it extends past the end of the
   117  // restriction.
   118  //
   119  // Example: If BufSize is 100, then a restriction of 75 to 325 should emit the
   120  // following substrings: [100, 200], [200, 300], [300, 400]
   121  func (fn *StringSplitFn) ProcessElement(ctx context.Context, rt *sdf.LockRTracker, elem string, emit func(string)) {
   122  	log.Debugf(ctx, "StringSplit ProcessElement: Tracker = %v", rt)
   123  	i := rt.GetRestriction().(offsetrange.Restriction).Start
   124  	if rem := i % fn.BufSize; rem != 0 {
   125  		i += fn.BufSize - rem // Skip to next multiple of BufSize.
   126  	}
   127  	strEnd := int64(len(elem))
   128  
   129  	for rt.TryClaim(i) == true {
   130  		if i+fn.BufSize > strEnd {
   131  			emit(elem[i:])
   132  		} else {
   133  			emit(elem[i : i+fn.BufSize])
   134  		}
   135  		i += fn.BufSize
   136  	}
   137  }
   138  
   139  // LogFn is a DoFn to log our split output.
   140  type LogFn struct{}
   141  
   142  // ProcessElement logs each element it receives.
   143  func (fn *LogFn) ProcessElement(ctx context.Context, in string) {
   144  	log.Infof(ctx, "StringSplit Output:\n%v", in)
   145  }
   146  
   147  // FinishBundle waits a bit so the job server finishes receiving logs.
   148  func (fn *LogFn) FinishBundle() {
   149  	time.Sleep(2 * time.Second)
   150  }
   151  
   152  // Use our StringSplitFn to split Shakespeare monologues into substrings and
   153  // output them.
   154  func main() {
   155  	flag.Parse()
   156  	beam.Init()
   157  
   158  	ctx := context.Background()
   159  
   160  	p := beam.NewPipeline()
   161  	s := p.Root()
   162  
   163  	monologues := beam.Create(s, macbeth, juliet, helena)
   164  	split := beam.ParDo(s, &StringSplitFn{50}, monologues)
   165  	beam.ParDo0(s, &LogFn{}, split)
   166  
   167  	if err := beamx.Run(ctx, p); err != nil {
   168  		log.Fatalf(ctx, "Failed to execute job: %v", err)
   169  	}
   170  }
   171  
   172  var macbeth = `Is this a dagger which I see before me,
   173  The handle toward my hand? Come, let me clutch thee.
   174  I have thee not, and yet I see thee still.
   175  Art thou not, fatal vision, sensible
   176  To feeling as to sight? or art thou but
   177  A dagger of the mind, a false creation,
   178  Proceeding from the heat-oppressed brain?
   179  I see thee yet, in form as palpable
   180  As this which now I draw.
   181  Thou marshall'st me the way that I was going;
   182  And such an instrument I was to use.
   183  Mine eyes are made the fools o' the other senses,
   184  Or else worth all the rest; I see thee still,
   185  And on thy blade and dudgeon gouts of blood,
   186  Which was not so before. There's no such thing:
   187  It is the bloody business which informs
   188  Thus to mine eyes. Now o'er the one halfworld
   189  Nature seems dead, and wicked dreams abuse
   190  The curtain'd sleep; witchcraft celebrates
   191  Pale Hecate's offerings, and wither'd murder,
   192  Alarum'd by his sentinel, the wolf,
   193  Whose howl's his watch, thus with his stealthy pace.
   194  With Tarquin's ravishing strides, towards his design
   195  Moves like a ghost. Thou sure and firm-set earth,
   196  Hear not my steps, which way they walk, for fear
   197  Thy very stones prate of my whereabout,
   198  And take the present horror from the time,
   199  Which now suits with it. Whiles I threat, he lives:
   200  Words to the heat of deeds too cold breath gives.
   201  [A bell rings]
   202  I go, and it is done; the bell invites me.
   203  Hear it not, Duncan; for it is a knell
   204  That summons thee to heaven or to hell.`
   205  
   206  var juliet = `O Romeo, Romeo! wherefore art thou Romeo?
   207  Deny thy father and refuse thy name;
   208  Or, if thou wilt not, be but sworn my love,
   209  And I'll no longer be a Capulet.
   210  'Tis but thy name that is my enemy;
   211  Thou art thyself, though not a Montague.
   212  What's Montague? it is nor hand, nor foot,
   213  Nor arm, nor face, nor any other part
   214  Belonging to a man. O, be some other name!
   215  What's in a name? that which we call a rose
   216  By any other name would smell as sweet;
   217  So Romeo would, were he not Romeo call'd,
   218  Retain that dear perfection which he owes
   219  Without that title. Romeo, doff thy name,
   220  And for that name which is no part of thee
   221  Take all myself.`
   222  
   223  var helena = `Lo, she is one of this confederacy!
   224  Now I perceive they have conjoin'd all three
   225  To fashion this false sport, in spite of me.
   226  Injurious Hermia! most ungrateful maid!
   227  Have you conspired, have you with these contrived
   228  To bait me with this foul derision?
   229  Is all the counsel that we two have shared,
   230  The sisters' vows, the hours that we have spent,
   231  When we have chid the hasty-footed time
   232  For parting us,--O, is it all forgot?
   233  All school-days' friendship, childhood innocence?
   234  We, Hermia, like two artificial gods,
   235  Have with our needles created both one flower,
   236  Both on one sampler, sitting on one cushion,
   237  Both warbling of one song, both in one key,
   238  As if our hands, our sides, voices and minds,
   239  Had been incorporate. So we grow together,
   240  Like to a double cherry, seeming parted,
   241  But yet an union in partition;
   242  Two lovely berries moulded on one stem;
   243  So, with two seeming bodies, but one heart;
   244  Two of the first, like coats in heraldry,
   245  Due but to one and crowned with one crest.
   246  And will you rent our ancient love asunder,
   247  To join with men in scorning your poor friend?
   248  It is not friendly, 'tis not maidenly:
   249  Our sex, as well as I, may chide you for it,
   250  Though I alone do feel the injury.`