github.com/apache/beam/sdks/v2@v2.48.2/go/examples/stringsplit/stringsplit.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 // An example of using a Splittable DoFn in the Go SDK with a portable runner. 17 // 18 // The following instructions describe how to execute this example in the 19 // Flink local runner. 20 // 21 // 1. From a command line, navigate to the top-level beam/ directory and run 22 // the Flink job server: 23 // 24 // ./gradlew :runners:flink:1.13:job-server:runShadow -Djob-host=localhost -Dflink-master=local 25 // 26 // 2. The job server is ready to receive jobs once it outputs a log like the 27 // following: `JobService started on localhost:8099`. Take note of the endpoint 28 // in that log message. 29 // 30 // 3. While the job server is running in one command line window, create a 31 // second one in the same directory and run this example with the following 32 // command, using the endpoint you noted from step 2: 33 // 34 // go run sdks/go/examples/stringsplit/stringsplit.go --runner=universal --endpoint=localhost:8099 35 // 36 // 4. Once the pipeline is complete, the job server can be closed with ctrl+C. 37 // To check the output of the pipeline, search the job server logs for the 38 // phrase "StringSplit Output". 39 package main 40 41 // beam-playground: 42 // name: StringSplit 43 // description: An example of using a Splittable DoFn in the Go SDK with a portable runner. 44 // multifile: false 45 // context_line: 61 46 // categories: 47 // - Debugging 48 // - Flatten 49 // complexity: MEDIUM 50 // tags: 51 // - pipeline 52 // - split 53 // - runner 54 55 import ( 56 "context" 57 "flag" 58 "time" 59 60 "github.com/apache/beam/sdks/v2/go/pkg/beam" 61 "github.com/apache/beam/sdks/v2/go/pkg/beam/core/sdf" 62 "github.com/apache/beam/sdks/v2/go/pkg/beam/io/rtrackers/offsetrange" 63 "github.com/apache/beam/sdks/v2/go/pkg/beam/log" 64 "github.com/apache/beam/sdks/v2/go/pkg/beam/register" 65 "github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx" 66 ) 67 68 func init() { 69 register.DoFn4x0[context.Context, *sdf.LockRTracker, string, func(string)](&StringSplitFn{}) 70 register.DoFn2x0[context.Context, string](&LogFn{}) 71 register.Emitter1[string]() 72 } 73 74 // StringSplitFn is a Splittable DoFn that splits strings into substrings of the 75 // specified size (for example, to be able to fit them in a small buffer). 76 // See ProcessElement for more details. 77 type StringSplitFn struct { 78 BufSize int64 79 } 80 81 // CreateInitialRestriction creates an offset range restriction for each element 82 // with the size of the restriction corresponding to the length of the string. 83 func (fn *StringSplitFn) CreateInitialRestriction(s string) offsetrange.Restriction { 84 rest := offsetrange.Restriction{Start: 0, End: int64(len(s))} 85 log.Debugf(context.Background(), "StringSplit CreateInitialRestriction: %v", rest) 86 return rest 87 } 88 89 // SplitRestriction performs initial splits so that each restriction is split 90 // into 5. 91 func (fn *StringSplitFn) SplitRestriction(_ string, rest offsetrange.Restriction) []offsetrange.Restriction { 92 splits := rest.EvenSplits(5) 93 log.Debugf(context.Background(), "StringSplit SplitRestrictions: %v -> %v", rest, splits) 94 return splits 95 } 96 97 // RestrictionSize returns the size as the difference between the restriction's 98 // start and end. 99 func (fn *StringSplitFn) RestrictionSize(_ string, rest offsetrange.Restriction) float64 { 100 size := rest.Size() 101 log.Debugf(context.Background(), "StringSplit RestrictionSize: %v -> %v", rest, size) 102 return size 103 } 104 105 // CreateTracker creates an offset range restriction tracker out of the offset 106 // range restriction, and wraps it a thread-safe restriction tracker. 107 func (fn *StringSplitFn) CreateTracker(rest offsetrange.Restriction) *sdf.LockRTracker { 108 return sdf.NewLockRTracker(offsetrange.NewTracker(rest)) 109 } 110 111 // ProcessElement splits a string into substrings of a specified size (set in 112 // StringSplitFn.BufSize). 113 // 114 // Note that the substring blocks are not guaranteed to line up with the 115 // restriction boundaries. ProcessElement is expected to emit any substring 116 // block that begins in its restriction, even if it extends past the end of the 117 // restriction. 118 // 119 // Example: If BufSize is 100, then a restriction of 75 to 325 should emit the 120 // following substrings: [100, 200], [200, 300], [300, 400] 121 func (fn *StringSplitFn) ProcessElement(ctx context.Context, rt *sdf.LockRTracker, elem string, emit func(string)) { 122 log.Debugf(ctx, "StringSplit ProcessElement: Tracker = %v", rt) 123 i := rt.GetRestriction().(offsetrange.Restriction).Start 124 if rem := i % fn.BufSize; rem != 0 { 125 i += fn.BufSize - rem // Skip to next multiple of BufSize. 126 } 127 strEnd := int64(len(elem)) 128 129 for rt.TryClaim(i) == true { 130 if i+fn.BufSize > strEnd { 131 emit(elem[i:]) 132 } else { 133 emit(elem[i : i+fn.BufSize]) 134 } 135 i += fn.BufSize 136 } 137 } 138 139 // LogFn is a DoFn to log our split output. 140 type LogFn struct{} 141 142 // ProcessElement logs each element it receives. 143 func (fn *LogFn) ProcessElement(ctx context.Context, in string) { 144 log.Infof(ctx, "StringSplit Output:\n%v", in) 145 } 146 147 // FinishBundle waits a bit so the job server finishes receiving logs. 148 func (fn *LogFn) FinishBundle() { 149 time.Sleep(2 * time.Second) 150 } 151 152 // Use our StringSplitFn to split Shakespeare monologues into substrings and 153 // output them. 154 func main() { 155 flag.Parse() 156 beam.Init() 157 158 ctx := context.Background() 159 160 p := beam.NewPipeline() 161 s := p.Root() 162 163 monologues := beam.Create(s, macbeth, juliet, helena) 164 split := beam.ParDo(s, &StringSplitFn{50}, monologues) 165 beam.ParDo0(s, &LogFn{}, split) 166 167 if err := beamx.Run(ctx, p); err != nil { 168 log.Fatalf(ctx, "Failed to execute job: %v", err) 169 } 170 } 171 172 var macbeth = `Is this a dagger which I see before me, 173 The handle toward my hand? Come, let me clutch thee. 174 I have thee not, and yet I see thee still. 175 Art thou not, fatal vision, sensible 176 To feeling as to sight? or art thou but 177 A dagger of the mind, a false creation, 178 Proceeding from the heat-oppressed brain? 179 I see thee yet, in form as palpable 180 As this which now I draw. 181 Thou marshall'st me the way that I was going; 182 And such an instrument I was to use. 183 Mine eyes are made the fools o' the other senses, 184 Or else worth all the rest; I see thee still, 185 And on thy blade and dudgeon gouts of blood, 186 Which was not so before. There's no such thing: 187 It is the bloody business which informs 188 Thus to mine eyes. Now o'er the one halfworld 189 Nature seems dead, and wicked dreams abuse 190 The curtain'd sleep; witchcraft celebrates 191 Pale Hecate's offerings, and wither'd murder, 192 Alarum'd by his sentinel, the wolf, 193 Whose howl's his watch, thus with his stealthy pace. 194 With Tarquin's ravishing strides, towards his design 195 Moves like a ghost. Thou sure and firm-set earth, 196 Hear not my steps, which way they walk, for fear 197 Thy very stones prate of my whereabout, 198 And take the present horror from the time, 199 Which now suits with it. Whiles I threat, he lives: 200 Words to the heat of deeds too cold breath gives. 201 [A bell rings] 202 I go, and it is done; the bell invites me. 203 Hear it not, Duncan; for it is a knell 204 That summons thee to heaven or to hell.` 205 206 var juliet = `O Romeo, Romeo! wherefore art thou Romeo? 207 Deny thy father and refuse thy name; 208 Or, if thou wilt not, be but sworn my love, 209 And I'll no longer be a Capulet. 210 'Tis but thy name that is my enemy; 211 Thou art thyself, though not a Montague. 212 What's Montague? it is nor hand, nor foot, 213 Nor arm, nor face, nor any other part 214 Belonging to a man. O, be some other name! 215 What's in a name? that which we call a rose 216 By any other name would smell as sweet; 217 So Romeo would, were he not Romeo call'd, 218 Retain that dear perfection which he owes 219 Without that title. Romeo, doff thy name, 220 And for that name which is no part of thee 221 Take all myself.` 222 223 var helena = `Lo, she is one of this confederacy! 224 Now I perceive they have conjoin'd all three 225 To fashion this false sport, in spite of me. 226 Injurious Hermia! most ungrateful maid! 227 Have you conspired, have you with these contrived 228 To bait me with this foul derision? 229 Is all the counsel that we two have shared, 230 The sisters' vows, the hours that we have spent, 231 When we have chid the hasty-footed time 232 For parting us,--O, is it all forgot? 233 All school-days' friendship, childhood innocence? 234 We, Hermia, like two artificial gods, 235 Have with our needles created both one flower, 236 Both on one sampler, sitting on one cushion, 237 Both warbling of one song, both in one key, 238 As if our hands, our sides, voices and minds, 239 Had been incorporate. So we grow together, 240 Like to a double cherry, seeming parted, 241 But yet an union in partition; 242 Two lovely berries moulded on one stem; 243 So, with two seeming bodies, but one heart; 244 Two of the first, like coats in heraldry, 245 Due but to one and crowned with one crest. 246 And will you rent our ancient love asunder, 247 To join with men in scorning your poor friend? 248 It is not friendly, 'tis not maidenly: 249 Our sex, as well as I, may chide you for it, 250 Though I alone do feel the injury.`