go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/tq/txn/spanner/lessor.go (about) 1 // Copyright 2021 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package spanner 16 17 import ( 18 "context" 19 "math" 20 "time" 21 22 "cloud.google.com/go/spanner" 23 24 "go.chromium.org/luci/common/clock" 25 "go.chromium.org/luci/common/errors" 26 "go.chromium.org/luci/common/logging" 27 "go.chromium.org/luci/common/retry/transient" 28 29 "go.chromium.org/luci/server/span" 30 "go.chromium.org/luci/server/tq/internal/lessor" 31 "go.chromium.org/luci/server/tq/internal/partition" 32 ) 33 34 // spanLessor implements lessor.Lessor on top of Cloud Spanner. 35 type spanLessor struct { 36 } 37 38 // WithLease acquires the lease and executes WithLeaseCB. 39 // The obtained lease duration may be shorter than requested. 40 // The obtained lease may be only for some parts of the desired Partition. 41 func (l *spanLessor) WithLease(ctx context.Context, sectionID string, part *partition.Partition, dur time.Duration, clbk lessor.WithLeaseCB) error { 42 expiresAt := clock.Now(ctx).Add(dur) 43 if d, ok := ctx.Deadline(); ok && expiresAt.After(d) { 44 expiresAt = d 45 } 46 47 lease, err := l.acquire(ctx, sectionID, part, expiresAt) 48 if err != nil { 49 return err 50 } 51 defer lease.remove(ctx) // failure to remove is logged & ignored. 52 53 lctx, cancel := clock.WithDeadline(ctx, lease.ExpiresAt) 54 defer cancel() 55 clbk(lctx, lease.parts) 56 return nil 57 } 58 59 func (*spanLessor) acquire(ctx context.Context, sectionID string, desired *partition.Partition, expiresAt time.Time) (*lease, error) { 60 var acquired *lease 61 deletedExpired := 0 62 63 _, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) error { 64 deletedExpired = 0 // reset in case of retries. 65 all, err := loadAll(ctx, sectionID) 66 if err != nil { 67 return errors.Annotate(err, "failed to read leases").Err() 68 } 69 active, expired := activeAndExpired(ctx, all) 70 if len(expired) > 0 { 71 // Deleting >= 1 lease every time a new one is created suffices to avoid 72 // accumulating garbage above O(active leases). 73 if len(expired) > 50 { 74 expired = expired[:50] 75 } 76 remove(ctx, expired) 77 deletedExpired = len(expired) 78 } 79 parts, err := availableForLease(desired, active) 80 if err != nil { 81 return errors.Annotate(err, "failed to decode available leases").Err() 82 } 83 acquired = save(ctx, sectionID, expiresAt, parts, maxLeaseID(all)) 84 return nil 85 }) 86 if err != nil { 87 return nil, errors.Annotate(err, "failed to transact a lease").Tag(transient.Tag).Err() 88 } 89 if deletedExpired > 0 { 90 // If this is logged frequently, something is wrong either with the leasing 91 // process or the lessees are holding to lease longer than they should. 92 logging.Warningf(ctx, "deleted %d expired leases", deletedExpired) 93 } 94 return acquired, nil 95 } 96 97 type lease struct { 98 SectionID string 99 LeaseID int64 100 SerializedParts []string 101 ExpiresAt time.Time 102 103 // Set only when lease object is created in save(). 104 parts partition.SortedPartitions 105 } 106 107 func save(ctx context.Context, sectionID string, expiresAt time.Time, parts partition.SortedPartitions, max int64) *lease { 108 if len(parts) == 0 { 109 return &lease{ 110 ExpiresAt: expiresAt, 111 parts: parts, 112 } // no need to save noop lease. 113 } 114 115 l := &lease{ 116 SectionID: sectionID, 117 SerializedParts: make([]string, len(parts)), 118 ExpiresAt: expiresAt.UTC(), 119 parts: parts, 120 } 121 for i, p := range parts { 122 l.SerializedParts[i] = p.String() 123 } 124 125 // Strictly increase the leaseID until it reaches to math.MaxInt64 then 126 // go back and increase from 1 again. 127 var leaseID int64 128 switch { 129 case max < math.MaxInt64: 130 leaseID = max + 1 131 default: 132 leaseID = 1 133 } 134 135 l.LeaseID = leaseID 136 m := spanner.InsertMap("TQLeases", map[string]any{ 137 "SectionID": l.SectionID, 138 "LeaseID": leaseID, 139 "SerializedParts": l.SerializedParts, 140 "ExpiresAt": l.ExpiresAt, 141 }) 142 span.BufferWrite(ctx, m) 143 144 return l 145 } 146 147 func (l *lease) remove(ctx context.Context) { 148 if l.LeaseID == 0 { 149 return 150 } 151 152 _, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) error { 153 remove(ctx, []*lease{l}) 154 return nil 155 }) 156 if err != nil { 157 // Log only. Once lease expires, it'll garbage-collected next time a new 158 // lease is acquired for the same sectionID. 159 logging.Warningf(ctx, "failed to remove lease %v", l) 160 } 161 } 162 163 func query(ctx context.Context, sectionID string) ([]*lease, error) { 164 st := spanner.NewStatement(` 165 SELECT SectionID, LeaseID, SerializedParts, ExpiresAt 166 FROM TQLeases 167 WHERE SectionID = @sectionID 168 `) 169 st.Params = map[string]any{ 170 "sectionID": sectionID, 171 } 172 173 var all []*lease 174 err := span.Query(ctx, st).Do( 175 func(row *spanner.Row) error { 176 l := &lease{} 177 if err := row.Columns(&l.SectionID, &l.LeaseID, &l.SerializedParts, &l.ExpiresAt); err != nil { 178 return err 179 } 180 all = append(all, l) 181 return nil 182 }, 183 ) 184 return all, err 185 } 186 187 func loadAll(ctx context.Context, sectionID string) ([]*lease, error) { 188 all, err := query(ctx, sectionID) 189 if err != nil { 190 return nil, errors.Annotate(err, "failed to fetch leases").Tag(transient.Tag).Err() 191 } 192 return all, nil 193 } 194 195 func activeAndExpired(ctx context.Context, all []*lease) (active, expired []*lease) { 196 // Partition active leases in the front and expired at the end of the slice. 197 i, j := 0, len(all) 198 now := clock.Now(ctx) 199 for i < j { 200 if all[i].ExpiresAt.After(now) { 201 i++ 202 continue 203 } 204 j-- 205 all[i], all[j] = all[j], all[i] 206 } 207 return all[:i], all[i:] 208 } 209 210 func maxLeaseID(all []*lease) int64 { 211 var max int64 = 0 212 for _, l := range all { 213 if l.LeaseID > max { 214 max = l.LeaseID 215 } 216 } 217 return max 218 } 219 220 func availableForLease(desired *partition.Partition, active []*lease) (partition.SortedPartitions, error) { 221 builder := partition.NewSortedPartitionsBuilder(desired) 222 // Exclude from desired all partitions under currently active leases. 223 // TODO(tandrii): constrain number of partitions per lease to avoid excessive 224 // runtime here. 225 for _, l := range active { 226 for _, s := range l.SerializedParts { 227 p, err := partition.FromString(s) 228 if err != nil { 229 return nil, err 230 } 231 builder.Exclude(p) 232 if builder.IsEmpty() { 233 break 234 } 235 } 236 } 237 return builder.Result(), nil 238 } 239 240 func remove(ctx context.Context, ls []*lease) { 241 ms := make([]*spanner.Mutation, 0, len(ls)) 242 for _, l := range ls { 243 if l.LeaseID == 0 { 244 continue 245 } 246 m := spanner.Delete("TQLeases", spanner.Key{l.SectionID, l.LeaseID}) 247 ms = append(ms, m) 248 } 249 span.BufferWrite(ctx, ms...) 250 }