github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/server/admin.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package server 12 13 import ( 14 "bytes" 15 "context" 16 "encoding/json" 17 "fmt" 18 "io" 19 "net/http" 20 "sort" 21 "strconv" 22 "strings" 23 "time" 24 25 "github.com/cockroachdb/apd" 26 "github.com/cockroachdb/cockroach/pkg/base" 27 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 28 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 29 "github.com/cockroachdb/cockroach/pkg/keys" 30 "github.com/cockroachdb/cockroach/pkg/kv" 31 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 32 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 33 "github.com/cockroachdb/cockroach/pkg/roachpb" 34 "github.com/cockroachdb/cockroach/pkg/rpc" 35 "github.com/cockroachdb/cockroach/pkg/security" 36 "github.com/cockroachdb/cockroach/pkg/server/debug" 37 "github.com/cockroachdb/cockroach/pkg/server/serverpb" 38 "github.com/cockroachdb/cockroach/pkg/server/telemetry" 39 "github.com/cockroachdb/cockroach/pkg/settings" 40 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 41 "github.com/cockroachdb/cockroach/pkg/sql" 42 "github.com/cockroachdb/cockroach/pkg/sql/parser" 43 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 44 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 45 "github.com/cockroachdb/cockroach/pkg/ts/catalog" 46 "github.com/cockroachdb/cockroach/pkg/util/contextutil" 47 "github.com/cockroachdb/cockroach/pkg/util/envutil" 48 "github.com/cockroachdb/cockroach/pkg/util/log" 49 "github.com/cockroachdb/cockroach/pkg/util/mon" 50 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 51 "github.com/cockroachdb/cockroach/pkg/util/uuid" 52 "github.com/cockroachdb/errors" 53 gwruntime "github.com/grpc-ecosystem/grpc-gateway/runtime" 54 gwutil "github.com/grpc-ecosystem/grpc-gateway/utilities" 55 "google.golang.org/grpc" 56 "google.golang.org/grpc/codes" 57 "google.golang.org/grpc/status" 58 ) 59 60 const ( 61 // adminPrefix is the prefix for RESTful endpoints used to provide an 62 // administrative interface to the cockroach cluster. 63 adminPrefix = "/_admin/v1/" 64 65 // defaultAPIEventLimit is the default maximum number of events returned by any 66 // endpoints returning events. 67 defaultAPIEventLimit = 1000 68 ) 69 70 // Number of empty ranges for table descriptors that aren't actually tables. These 71 // cause special cases in range count computations because we split along them anyway, 72 // but they're not SQL tables. 73 func nonTableDescriptorRangeCount() int64 { 74 // NB: explicitly reference them for IDE usage. 75 return int64(len([]int{ 76 keys.MetaRangesID, 77 keys.SystemRangesID, 78 keys.TimeseriesRangesID, 79 keys.LivenessRangesID, 80 keys.PublicSchemaID, 81 })) 82 } 83 84 // apiServerMessage is the standard body for all HTTP 500 responses. 85 var errAdminAPIError = status.Errorf(codes.Internal, "An internal server error "+ 86 "has occurred. Please check your CockroachDB logs for more details.") 87 88 // A adminServer provides a RESTful HTTP API to administration of 89 // the cockroach cluster. 90 type adminServer struct { 91 server *Server 92 memMonitor mon.BytesMonitor 93 } 94 95 // noteworthyAdminMemoryUsageBytes is the minimum size tracked by the 96 // admin SQL pool before the pool start explicitly logging overall 97 // usage growth in the log. 98 var noteworthyAdminMemoryUsageBytes = envutil.EnvOrDefaultInt64("COCKROACH_NOTEWORTHY_ADMIN_MEMORY_USAGE", 100*1024) 99 100 // newAdminServer allocates and returns a new REST server for 101 // administrative APIs. 102 func newAdminServer(s *Server) *adminServer { 103 server := &adminServer{server: s} 104 // TODO(knz): We do not limit memory usage by admin operations 105 // yet. Is this wise? 106 server.memMonitor = mon.MakeUnlimitedMonitor( 107 context.Background(), 108 "admin", 109 mon.MemoryResource, 110 nil, 111 nil, 112 noteworthyAdminMemoryUsageBytes, 113 s.ClusterSettings(), 114 ) 115 return server 116 } 117 118 // RegisterService registers the GRPC service. 119 func (s *adminServer) RegisterService(g *grpc.Server) { 120 serverpb.RegisterAdminServer(g, s) 121 } 122 123 // RegisterGateway starts the gateway (i.e. reverse proxy) that proxies HTTP requests 124 // to the appropriate gRPC endpoints. 125 func (s *adminServer) RegisterGateway( 126 ctx context.Context, mux *gwruntime.ServeMux, conn *grpc.ClientConn, 127 ) error { 128 // Register the /_admin/v1/stmtbundle endpoint, which serves statement support 129 // bundles as files. 130 stmtBundlePattern := gwruntime.MustPattern(gwruntime.NewPattern( 131 1, /* version */ 132 []int{ 133 int(gwutil.OpLitPush), 0, int(gwutil.OpLitPush), 1, int(gwutil.OpLitPush), 2, 134 int(gwutil.OpPush), 0, int(gwutil.OpConcatN), 1, int(gwutil.OpCapture), 3}, 135 []string{"_admin", "v1", "stmtbundle", "id"}, 136 "", /* verb */ 137 )) 138 139 mux.Handle("GET", stmtBundlePattern, func( 140 w http.ResponseWriter, req *http.Request, pathParams map[string]string, 141 ) { 142 idStr, ok := pathParams["id"] 143 if !ok { 144 http.Error(w, "missing id", http.StatusBadRequest) 145 return 146 } 147 id, err := strconv.ParseInt(idStr, 10, 64) 148 if err != nil { 149 http.Error(w, "invalid id", http.StatusBadRequest) 150 return 151 } 152 s.getStatementBundle(ctx, id, w) 153 }) 154 155 // Register the endpoints defined in the proto. 156 return serverpb.RegisterAdminHandler(ctx, mux, conn) 157 } 158 159 // serverError logs the provided error and returns an error that should be returned by 160 // the RPC endpoint method. 161 func (s *adminServer) serverError(err error) error { 162 log.ErrorfDepth(context.TODO(), 1, "%s", err) 163 return errAdminAPIError 164 } 165 166 // serverErrorf logs the provided error and returns an error that should be returned by 167 // the RPC endpoint method. 168 func (s *adminServer) serverErrorf(format string, args ...interface{}) error { 169 log.ErrorfDepth(context.TODO(), 1, format, args...) 170 return errAdminAPIError 171 } 172 173 // serverErrors logs the provided errors and returns an error that should be returned by 174 // the RPC endpoint method. 175 func (s *adminServer) serverErrors(errors []error) error { 176 log.ErrorfDepth(context.TODO(), 1, "%v", errors) 177 return errAdminAPIError 178 } 179 180 // isNotFoundError returns true if err is a table/database not found error. 181 func (s *adminServer) isNotFoundError(err error) bool { 182 // TODO(cdo): Replace this crude suffix-matching with something more structured once we have 183 // more structured errors. 184 return err != nil && strings.HasSuffix(err.Error(), "does not exist") 185 } 186 187 // AllMetricMetadata returns all metrics' metadata. 188 func (s *adminServer) AllMetricMetadata( 189 ctx context.Context, req *serverpb.MetricMetadataRequest, 190 ) (*serverpb.MetricMetadataResponse, error) { 191 192 resp := &serverpb.MetricMetadataResponse{ 193 Metadata: s.server.recorder.GetMetricsMetadata(), 194 } 195 196 return resp, nil 197 } 198 199 // ChartCatalog returns a catalog of Admin UI charts useful for debugging. 200 func (s *adminServer) ChartCatalog( 201 ctx context.Context, req *serverpb.ChartCatalogRequest, 202 ) (*serverpb.ChartCatalogResponse, error) { 203 metricsMetadata := s.server.recorder.GetMetricsMetadata() 204 205 chartCatalog, err := catalog.GenerateCatalog(metricsMetadata) 206 207 if err != nil { 208 return nil, err 209 } 210 211 resp := &serverpb.ChartCatalogResponse{ 212 Catalog: chartCatalog, 213 } 214 215 return resp, nil 216 } 217 218 // Databases is an endpoint that returns a list of databases. 219 func (s *adminServer) Databases( 220 ctx context.Context, req *serverpb.DatabasesRequest, 221 ) (*serverpb.DatabasesResponse, error) { 222 ctx = s.server.AnnotateCtx(ctx) 223 224 sessionUser, err := userFromContext(ctx) 225 if err != nil { 226 return nil, err 227 } 228 229 rows, err := s.server.sqlServer.internalExecutor.QueryEx( 230 ctx, "admin-show-dbs", nil, /* txn */ 231 sqlbase.InternalExecutorSessionDataOverride{User: sessionUser}, 232 "SHOW DATABASES", 233 ) 234 if err != nil { 235 return nil, s.serverError(err) 236 } 237 238 var resp serverpb.DatabasesResponse 239 for _, row := range rows { 240 dbDatum, ok := tree.AsDString(row[0]) 241 if !ok { 242 return nil, s.serverErrorf("type assertion failed on db name: %T", row[0]) 243 } 244 dbName := string(dbDatum) 245 resp.Databases = append(resp.Databases, dbName) 246 } 247 248 return &resp, nil 249 } 250 251 // DatabaseDetails is an endpoint that returns grants and a list of table names 252 // for the specified database. 253 func (s *adminServer) DatabaseDetails( 254 ctx context.Context, req *serverpb.DatabaseDetailsRequest, 255 ) (*serverpb.DatabaseDetailsResponse, error) { 256 ctx = s.server.AnnotateCtx(ctx) 257 userName, err := userFromContext(ctx) 258 if err != nil { 259 return nil, err 260 } 261 262 escDBName := tree.NameStringP(&req.Database) 263 // Placeholders don't work with SHOW statements, so we need to manually 264 // escape the database name. 265 // 266 // TODO(cdo): Use placeholders when they're supported by SHOW. 267 268 // Marshal grants. 269 rows, cols, err := s.server.sqlServer.internalExecutor.QueryWithCols( 270 ctx, "admin-show-grants", nil, /* txn */ 271 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 272 fmt.Sprintf("SHOW GRANTS ON DATABASE %s", escDBName), 273 ) 274 if s.isNotFoundError(err) { 275 return nil, status.Errorf(codes.NotFound, "%s", err) 276 } 277 if err != nil { 278 return nil, s.serverError(err) 279 } 280 var resp serverpb.DatabaseDetailsResponse 281 { 282 const ( 283 schemaCol = "schema_name" 284 userCol = "grantee" 285 privilegesCol = "privilege_type" 286 ) 287 288 scanner := makeResultScanner(cols) 289 for _, row := range rows { 290 var schemaName string 291 if err := scanner.Scan(row, schemaCol, &schemaName); err != nil { 292 return nil, err 293 } 294 if schemaName != tree.PublicSchema { 295 // We only want to list real tables. 296 continue 297 } 298 299 // Marshal grant, splitting comma-separated privileges into a proper slice. 300 var grant serverpb.DatabaseDetailsResponse_Grant 301 var privileges string 302 if err := scanner.Scan(row, userCol, &grant.User); err != nil { 303 return nil, err 304 } 305 if err := scanner.Scan(row, privilegesCol, &privileges); err != nil { 306 return nil, err 307 } 308 grant.Privileges = strings.Split(privileges, ",") 309 resp.Grants = append(resp.Grants, grant) 310 } 311 } 312 313 // Marshal table names. 314 rows, cols, err = s.server.sqlServer.internalExecutor.QueryWithCols( 315 ctx, "admin-show-tables", nil, /* txn */ 316 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 317 fmt.Sprintf("SHOW TABLES FROM %s", escDBName), 318 ) 319 if s.isNotFoundError(err) { 320 return nil, status.Errorf(codes.NotFound, "%s", err) 321 } 322 if err != nil { 323 return nil, s.serverError(err) 324 } 325 326 // Marshal table names. 327 { 328 scanner := makeResultScanner(cols) 329 if a, e := len(cols), 3; a != e { 330 return nil, s.serverErrorf("show tables columns mismatch: %d != expected %d", a, e) 331 } 332 for _, row := range rows { 333 var schemaName, tableName string 334 if err := scanner.Scan(row, "schema_name", &schemaName); err != nil { 335 return nil, err 336 } 337 if schemaName != "public" { 338 continue 339 } 340 if err := scanner.Scan(row, "table_name", &tableName); err != nil { 341 return nil, err 342 } 343 resp.TableNames = append(resp.TableNames, tableName) 344 } 345 } 346 347 // Query the descriptor ID and zone configuration for this database. 348 { 349 path, err := s.queryDescriptorIDPath(ctx, userName, []string{req.Database}) 350 if err != nil { 351 return nil, s.serverError(err) 352 } 353 resp.DescriptorID = int64(path[1]) 354 355 id, zone, zoneExists, err := s.queryZonePath(ctx, userName, path) 356 if err != nil { 357 return nil, s.serverError(err) 358 } 359 360 if !zoneExists { 361 zone = s.server.cfg.DefaultZoneConfig 362 } 363 resp.ZoneConfig = zone 364 365 switch id { 366 case path[1]: 367 resp.ZoneConfigLevel = serverpb.ZoneConfigurationLevel_DATABASE 368 default: 369 resp.ZoneConfigLevel = serverpb.ZoneConfigurationLevel_CLUSTER 370 } 371 } 372 373 return &resp, nil 374 } 375 376 // TableDetails is an endpoint that returns columns, indices, and other 377 // relevant details for the specified table. 378 func (s *adminServer) TableDetails( 379 ctx context.Context, req *serverpb.TableDetailsRequest, 380 ) (*serverpb.TableDetailsResponse, error) { 381 ctx = s.server.AnnotateCtx(ctx) 382 userName, err := userFromContext(ctx) 383 if err != nil { 384 return nil, err 385 } 386 387 escDBName := tree.NameStringP(&req.Database) 388 // TODO(cdo): Use real placeholders for the table and database names when we've extended our SQL 389 // grammar to allow that. 390 escTableName := tree.NameStringP(&req.Table) 391 escQualTable := fmt.Sprintf("%s.%s", escDBName, escTableName) 392 393 var resp serverpb.TableDetailsResponse 394 395 // Marshal SHOW COLUMNS result. 396 rows, cols, err := s.server.sqlServer.internalExecutor.QueryWithCols( 397 ctx, "admin-show-columns", 398 nil, /* txn */ 399 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 400 fmt.Sprintf("SHOW COLUMNS FROM %s", escQualTable), 401 ) 402 if s.isNotFoundError(err) { 403 return nil, status.Errorf(codes.NotFound, "%s", err) 404 } 405 if err != nil { 406 return nil, s.serverError(err) 407 } 408 // TODO(cdo): protobuf v3's default behavior for fields with zero values (e.g. empty strings) 409 // is to suppress them. So, if protobuf field "foo" is an empty string, "foo" won't show 410 // up in the marshaled JSON. I feel that this is counterintuitive, and this should be fixed 411 // for our API. 412 { 413 const ( 414 colCol = "column_name" 415 typeCol = "data_type" 416 nullCol = "is_nullable" 417 defaultCol = "column_default" 418 genCol = "generation_expression" 419 hiddenCol = "is_hidden" 420 ) 421 scanner := makeResultScanner(cols) 422 for _, row := range rows { 423 var col serverpb.TableDetailsResponse_Column 424 if err := scanner.Scan(row, colCol, &col.Name); err != nil { 425 return nil, err 426 } 427 if err := scanner.Scan(row, typeCol, &col.Type); err != nil { 428 return nil, err 429 } 430 if err := scanner.Scan(row, nullCol, &col.Nullable); err != nil { 431 return nil, err 432 } 433 if err := scanner.Scan(row, hiddenCol, &col.Hidden); err != nil { 434 return nil, err 435 } 436 isDefaultNull, err := scanner.IsNull(row, defaultCol) 437 if err != nil { 438 return nil, err 439 } 440 if !isDefaultNull { 441 if err := scanner.Scan(row, defaultCol, &col.DefaultValue); err != nil { 442 return nil, err 443 } 444 } 445 isGenNull, err := scanner.IsNull(row, genCol) 446 if err != nil { 447 return nil, err 448 } 449 if !isGenNull { 450 if err := scanner.Scan(row, genCol, &col.GenerationExpression); err != nil { 451 return nil, err 452 } 453 } 454 resp.Columns = append(resp.Columns, col) 455 } 456 } 457 458 // Marshal SHOW INDEX result. 459 rows, cols, err = s.server.sqlServer.internalExecutor.QueryWithCols( 460 ctx, "admin-showindex", nil, /* txn */ 461 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 462 fmt.Sprintf("SHOW INDEX FROM %s", escQualTable), 463 ) 464 if s.isNotFoundError(err) { 465 return nil, status.Errorf(codes.NotFound, "%s", err) 466 } 467 if err != nil { 468 return nil, s.serverError(err) 469 } 470 { 471 const ( 472 nameCol = "index_name" 473 nonUniqueCol = "non_unique" 474 seqCol = "seq_in_index" 475 columnCol = "column_name" 476 directionCol = "direction" 477 storingCol = "storing" 478 implicitCol = "implicit" 479 ) 480 scanner := makeResultScanner(cols) 481 for _, row := range rows { 482 // Marshal grant, splitting comma-separated privileges into a proper slice. 483 var index serverpb.TableDetailsResponse_Index 484 if err := scanner.Scan(row, nameCol, &index.Name); err != nil { 485 return nil, err 486 } 487 var nonUnique bool 488 if err := scanner.Scan(row, nonUniqueCol, &nonUnique); err != nil { 489 return nil, err 490 } 491 index.Unique = !nonUnique 492 if err := scanner.Scan(row, seqCol, &index.Seq); err != nil { 493 return nil, err 494 } 495 if err := scanner.Scan(row, columnCol, &index.Column); err != nil { 496 return nil, err 497 } 498 if err := scanner.Scan(row, directionCol, &index.Direction); err != nil { 499 return nil, err 500 } 501 if err := scanner.Scan(row, storingCol, &index.Storing); err != nil { 502 return nil, err 503 } 504 if err := scanner.Scan(row, implicitCol, &index.Implicit); err != nil { 505 return nil, err 506 } 507 resp.Indexes = append(resp.Indexes, index) 508 } 509 } 510 511 // Marshal SHOW GRANTS result. 512 rows, cols, err = s.server.sqlServer.internalExecutor.QueryWithCols( 513 ctx, "admin-show-grants", nil, /* txn */ 514 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 515 fmt.Sprintf("SHOW GRANTS ON TABLE %s", escQualTable), 516 ) 517 if s.isNotFoundError(err) { 518 return nil, status.Errorf(codes.NotFound, "%s", err) 519 } 520 if err != nil { 521 return nil, s.serverError(err) 522 } 523 { 524 const ( 525 userCol = "grantee" 526 privilegesCol = "privilege_type" 527 ) 528 scanner := makeResultScanner(cols) 529 for _, row := range rows { 530 // Marshal grant, splitting comma-separated privileges into a proper slice. 531 var grant serverpb.TableDetailsResponse_Grant 532 var privileges string 533 if err := scanner.Scan(row, userCol, &grant.User); err != nil { 534 return nil, err 535 } 536 if err := scanner.Scan(row, privilegesCol, &privileges); err != nil { 537 return nil, err 538 } 539 grant.Privileges = strings.Split(privileges, ",") 540 resp.Grants = append(resp.Grants, grant) 541 } 542 } 543 544 // Marshal SHOW CREATE result. 545 rows, cols, err = s.server.sqlServer.internalExecutor.QueryWithCols( 546 ctx, "admin-show-create", nil, /* txn */ 547 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 548 fmt.Sprintf("SHOW CREATE %s", escQualTable), 549 ) 550 if s.isNotFoundError(err) { 551 return nil, status.Errorf(codes.NotFound, "%s", err) 552 } 553 if err != nil { 554 return nil, s.serverError(err) 555 } 556 { 557 const createCol = "create_statement" 558 if len(rows) != 1 { 559 return nil, s.serverErrorf("create response not available.") 560 } 561 562 scanner := makeResultScanner(cols) 563 var createStmt string 564 if err := scanner.Scan(rows[0], createCol, &createStmt); err != nil { 565 return nil, err 566 } 567 568 resp.CreateTableStatement = createStmt 569 } 570 571 var tableID sqlbase.ID 572 // Query the descriptor ID and zone configuration for this table. 573 { 574 path, err := s.queryDescriptorIDPath(ctx, userName, []string{req.Database, req.Table}) 575 if err != nil { 576 return nil, s.serverError(err) 577 } 578 tableID = path[2] 579 resp.DescriptorID = int64(tableID) 580 581 id, zone, zoneExists, err := s.queryZonePath(ctx, userName, path) 582 if err != nil { 583 return nil, s.serverError(err) 584 } 585 586 if !zoneExists { 587 zone = s.server.cfg.DefaultZoneConfig 588 } 589 resp.ZoneConfig = zone 590 591 switch id { 592 case path[1]: 593 resp.ZoneConfigLevel = serverpb.ZoneConfigurationLevel_DATABASE 594 case path[2]: 595 resp.ZoneConfigLevel = serverpb.ZoneConfigurationLevel_TABLE 596 default: 597 resp.ZoneConfigLevel = serverpb.ZoneConfigurationLevel_CLUSTER 598 } 599 } 600 601 // Get the number of ranges in the table. We get the key span for the table 602 // data. Then, we count the number of ranges that make up that key span. 603 { 604 tableSpan := generateTableSpan(tableID) 605 tableRSpan := roachpb.RSpan{} 606 var err error 607 tableRSpan.Key, err = keys.Addr(tableSpan.Key) 608 if err != nil { 609 return nil, s.serverError(err) 610 } 611 tableRSpan.EndKey, err = keys.Addr(tableSpan.EndKey) 612 if err != nil { 613 return nil, s.serverError(err) 614 } 615 rangeCount, err := s.server.distSender.CountRanges(ctx, tableRSpan) 616 if err != nil { 617 return nil, s.serverError(err) 618 } 619 resp.RangeCount = rangeCount 620 } 621 622 return &resp, nil 623 } 624 625 // generateTableSpan generates a table's key span. 626 // 627 // NOTE: this doesn't make sense for interleaved (children) table. As of 628 // 03/2018, callers around here use it anyway. 629 func generateTableSpan(tableID sqlbase.ID) roachpb.Span { 630 tableStartKey := keys.TODOSQLCodec.TablePrefix(uint32(tableID)) 631 tableEndKey := tableStartKey.PrefixEnd() 632 return roachpb.Span{Key: tableStartKey, EndKey: tableEndKey} 633 } 634 635 // TableStats is an endpoint that returns disk usage and replication statistics 636 // for the specified table. 637 func (s *adminServer) TableStats( 638 ctx context.Context, req *serverpb.TableStatsRequest, 639 ) (*serverpb.TableStatsResponse, error) { 640 // TODO(someone): perform authorization based on the requesting user's 641 // SELECT privilege over the requested table. 642 userName, err := s.requireAdminUser(ctx) 643 if err != nil { 644 return nil, err 645 } 646 647 // Get table span. 648 path, err := s.queryDescriptorIDPath( 649 ctx, userName, []string{req.Database, req.Table}, 650 ) 651 if err != nil { 652 return nil, s.serverError(err) 653 } 654 tableID := path[2] 655 tableSpan := generateTableSpan(tableID) 656 657 return s.statsForSpan(ctx, tableSpan) 658 } 659 660 // NonTableStats is an endpoint that returns disk usage and replication 661 // statistics for non-table parts of the system. 662 func (s *adminServer) NonTableStats( 663 ctx context.Context, req *serverpb.NonTableStatsRequest, 664 ) (*serverpb.NonTableStatsResponse, error) { 665 if _, err := s.requireAdminUser(ctx); err != nil { 666 return nil, err 667 } 668 669 timeSeriesStats, err := s.statsForSpan(ctx, roachpb.Span{ 670 Key: keys.TimeseriesPrefix, 671 EndKey: keys.TimeseriesPrefix.PrefixEnd(), 672 }) 673 if err != nil { 674 return nil, err 675 } 676 response := serverpb.NonTableStatsResponse{ 677 TimeSeriesStats: timeSeriesStats, 678 } 679 680 spansForInternalUse := []roachpb.Span{ 681 { 682 Key: keys.LocalMax, 683 EndKey: keys.TimeseriesPrefix, 684 }, 685 { 686 Key: keys.TimeseriesKeyMax, 687 EndKey: keys.TableDataMin, 688 }, 689 } 690 for _, span := range spansForInternalUse { 691 nonTableStats, err := s.statsForSpan(ctx, span) 692 if err != nil { 693 return nil, err 694 } 695 if response.InternalUseStats == nil { 696 response.InternalUseStats = nonTableStats 697 } else { 698 response.InternalUseStats.Add(nonTableStats) 699 } 700 } 701 702 // There are four empty ranges for table descriptors 17, 18, 19, and 22 that 703 // aren't actually tables (a.k.a. MetaRangesID, SystemRangesID, 704 // TimeseriesRangesID, and LivenessRangesID in pkg/keys). 705 // No data is ever really written to them since they don't have actual 706 // tables. Some backend work could probably be done to eliminate these empty 707 // ranges, but it may be more trouble than it's worth. In the meantime, 708 // sweeping them under the general-purpose "Internal use" label in 709 // the "Non-Table" section of the Databases page. 710 response.InternalUseStats.RangeCount += nonTableDescriptorRangeCount() 711 712 return &response, nil 713 } 714 715 func (s *adminServer) statsForSpan( 716 ctx context.Context, span roachpb.Span, 717 ) (*serverpb.TableStatsResponse, error) { 718 startKey, err := keys.Addr(span.Key) 719 if err != nil { 720 return nil, s.serverError(err) 721 } 722 endKey, err := keys.Addr(span.EndKey) 723 if err != nil { 724 return nil, s.serverError(err) 725 } 726 727 // Get current range descriptors for table. This is done by scanning over 728 // meta2 keys for the range. A special case occurs if we wish to include 729 // the meta1 key range itself, in which case we'll get KeyMin back and that 730 // cannot be scanned (due to range-local addressing confusion). This is 731 // handled appropriately by adjusting the bounds to grab the descriptors 732 // for all ranges (including range1, which is not only gossiped but also 733 // persisted in meta1). 734 startMetaKey := keys.RangeMetaKey(startKey) 735 if bytes.Equal(startMetaKey, roachpb.RKeyMin) { 736 // This is the special case described above. The following key instructs 737 // the code below to scan all of the addressing, i.e. grab all of the 738 // descriptors including that for r1. 739 startMetaKey = keys.RangeMetaKey(keys.MustAddr(keys.Meta2Prefix)) 740 } 741 742 rangeDescKVs, err := s.server.db.Scan(ctx, startMetaKey, keys.RangeMetaKey(endKey), 0) 743 if err != nil { 744 return nil, s.serverError(err) 745 } 746 747 // This map will store the nodes we need to fan out to. 748 nodeIDs := make(map[roachpb.NodeID]struct{}) 749 for _, kv := range rangeDescKVs { 750 var rng roachpb.RangeDescriptor 751 if err := kv.Value.GetProto(&rng); err != nil { 752 return nil, s.serverError(err) 753 } 754 for _, repl := range rng.Replicas().All() { 755 nodeIDs[repl.NodeID] = struct{}{} 756 } 757 } 758 759 // Construct TableStatsResponse by sending an RPC to every node involved. 760 tableStatResponse := serverpb.TableStatsResponse{ 761 NodeCount: int64(len(nodeIDs)), 762 // TODO(mrtracy): The "RangeCount" returned by TableStats is more 763 // accurate than the "RangeCount" returned by TableDetails, because this 764 // method always consistently queries the meta2 key range for the table; 765 // in contrast, TableDetails uses a method on the DistSender, which 766 // queries using a range metadata cache and thus may return stale data 767 // for tables that are rapidly splitting. However, one potential 768 // *advantage* of using the DistSender is that it will populate the 769 // DistSender's range metadata cache in the case where meta2 information 770 // for this table is not already present; the query used by TableStats 771 // does not populate the DistSender cache. We should consider plumbing 772 // TableStats' meta2 query through the DistSender so that it will share 773 // the advantage of populating the cache (without the disadvantage of 774 // potentially returning stale data). 775 // See Github #5435 for some discussion. 776 RangeCount: int64(len(rangeDescKVs)), 777 } 778 type nodeResponse struct { 779 nodeID roachpb.NodeID 780 resp *serverpb.SpanStatsResponse 781 err error 782 } 783 784 // Send a SpanStats query to each node. 785 responses := make(chan nodeResponse, len(nodeIDs)) 786 for nodeID := range nodeIDs { 787 nodeID := nodeID // avoid data race 788 if err := s.server.stopper.RunAsyncTask( 789 ctx, "server.adminServer: requesting remote stats", 790 func(ctx context.Context) { 791 // Set a generous timeout on the context for each individual query. 792 var spanResponse *serverpb.SpanStatsResponse 793 err := contextutil.RunWithTimeout(ctx, "request remote stats", 5*base.NetworkTimeout, 794 func(ctx context.Context) error { 795 client, err := s.server.status.dialNode(ctx, nodeID) 796 if err == nil { 797 req := serverpb.SpanStatsRequest{ 798 StartKey: startKey, 799 EndKey: endKey, 800 NodeID: nodeID.String(), 801 } 802 spanResponse, err = client.SpanStats(ctx, &req) 803 } 804 return err 805 }) 806 807 // Channel is buffered, can always write. 808 responses <- nodeResponse{ 809 nodeID: nodeID, 810 resp: spanResponse, 811 err: err, 812 } 813 }); err != nil { 814 return nil, err 815 } 816 } 817 for remainingResponses := len(nodeIDs); remainingResponses > 0; remainingResponses-- { 818 select { 819 case resp := <-responses: 820 // For nodes which returned an error, note that the node's data 821 // is missing. For successful calls, aggregate statistics. 822 if resp.err != nil { 823 tableStatResponse.MissingNodes = append( 824 tableStatResponse.MissingNodes, 825 serverpb.TableStatsResponse_MissingNode{ 826 NodeID: resp.nodeID.String(), 827 ErrorMessage: resp.err.Error(), 828 }, 829 ) 830 } else { 831 tableStatResponse.Stats.Add(resp.resp.TotalStats) 832 tableStatResponse.ReplicaCount += int64(resp.resp.RangeCount) 833 tableStatResponse.ApproximateDiskBytes += resp.resp.ApproximateDiskBytes 834 } 835 case <-ctx.Done(): 836 // Caller gave up, stop doing work. 837 return nil, ctx.Err() 838 } 839 } 840 841 return &tableStatResponse, nil 842 } 843 844 // Users returns a list of users, stripped of any passwords. 845 func (s *adminServer) Users( 846 ctx context.Context, req *serverpb.UsersRequest, 847 ) (*serverpb.UsersResponse, error) { 848 ctx = s.server.AnnotateCtx(ctx) 849 userName, err := userFromContext(ctx) 850 if err != nil { 851 return nil, err 852 } 853 query := `SELECT username FROM system.users WHERE "isRole" = false` 854 rows, err := s.server.sqlServer.internalExecutor.QueryEx( 855 ctx, "admin-users", nil, /* txn */ 856 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 857 query, 858 ) 859 if err != nil { 860 return nil, s.serverError(err) 861 } 862 863 var resp serverpb.UsersResponse 864 for _, row := range rows { 865 resp.Users = append(resp.Users, serverpb.UsersResponse_User{Username: string(tree.MustBeDString(row[0]))}) 866 } 867 return &resp, nil 868 } 869 870 // Events is an endpoint that returns the latest event log entries, with the following 871 // optional URL parameters: 872 // 873 // type=STRING returns events with this type (e.g. "create_table") 874 // targetID=INT returns events for that have this targetID 875 func (s *adminServer) Events( 876 ctx context.Context, req *serverpb.EventsRequest, 877 ) (*serverpb.EventsResponse, error) { 878 ctx = s.server.AnnotateCtx(ctx) 879 880 userName, isAdmin, err := s.getUserAndRole(ctx) 881 if err != nil { 882 return nil, err 883 } 884 redactEvents := false 885 if isAdmin { 886 // We obey the redacted bit only if the user is admin. 887 redactEvents = !req.UnredactedEvents 888 } 889 890 limit := req.Limit 891 if limit == 0 { 892 limit = defaultAPIEventLimit 893 } 894 895 // Execute the query. 896 q := makeSQLQuery() 897 q.Append(`SELECT timestamp, "eventType", "targetID", "reportingID", info, "uniqueID" `) 898 q.Append("FROM system.eventlog ") 899 q.Append("WHERE true ") // This simplifies the WHERE clause logic below. 900 if len(req.Type) > 0 { 901 q.Append(`AND "eventType" = $ `, req.Type) 902 } 903 if req.TargetId > 0 { 904 q.Append(`AND "targetID" = $ `, req.TargetId) 905 } 906 q.Append("ORDER BY timestamp DESC ") 907 if limit > 0 { 908 q.Append("LIMIT $", limit) 909 } 910 if len(q.Errors()) > 0 { 911 return nil, s.serverErrors(q.Errors()) 912 } 913 rows, cols, err := s.server.sqlServer.internalExecutor.QueryWithCols( 914 ctx, "admin-events", nil, /* txn */ 915 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 916 q.String(), q.QueryArguments()...) 917 if err != nil { 918 return nil, s.serverError(err) 919 } 920 921 // Marshal response. 922 var resp serverpb.EventsResponse 923 scanner := makeResultScanner(cols) 924 for _, row := range rows { 925 var event serverpb.EventsResponse_Event 926 var ts time.Time 927 if err := scanner.ScanIndex(row, 0, &ts); err != nil { 928 return nil, err 929 } 930 event.Timestamp = ts 931 if err := scanner.ScanIndex(row, 1, &event.EventType); err != nil { 932 return nil, err 933 } 934 if err := scanner.ScanIndex(row, 2, &event.TargetID); err != nil { 935 return nil, err 936 } 937 if err := scanner.ScanIndex(row, 3, &event.ReportingID); err != nil { 938 return nil, err 939 } 940 if err := scanner.ScanIndex(row, 4, &event.Info); err != nil { 941 return nil, err 942 } 943 if event.EventType == string(sql.EventLogSetClusterSetting) { 944 if redactEvents { 945 event.Info = redactSettingsChange(event.Info) 946 } 947 } 948 if err := scanner.ScanIndex(row, 5, &event.UniqueID); err != nil { 949 return nil, err 950 } 951 952 resp.Events = append(resp.Events, event) 953 } 954 return &resp, nil 955 } 956 957 // make a best-effort attempt at redacting the setting value. 958 func redactSettingsChange(info string) string { 959 var s sql.EventLogSetClusterSettingDetail 960 if err := json.Unmarshal([]byte(info), &s); err != nil { 961 return "" 962 } 963 s.Value = "<hidden>" 964 ret, err := json.Marshal(s) 965 if err != nil { 966 return "" 967 } 968 return string(ret) 969 } 970 971 // RangeLog is an endpoint that returns the latest range log entries. 972 func (s *adminServer) RangeLog( 973 ctx context.Context, req *serverpb.RangeLogRequest, 974 ) (*serverpb.RangeLogResponse, error) { 975 ctx = s.server.AnnotateCtx(ctx) 976 977 // Range keys, even when pretty-printed, contain PII. 978 userName, err := s.requireAdminUser(ctx) 979 if err != nil { 980 return nil, err 981 } 982 983 limit := req.Limit 984 if limit == 0 { 985 limit = defaultAPIEventLimit 986 } 987 988 includeRawKeys := debug.GatewayRemoteAllowed(ctx, s.server.ClusterSettings()) 989 990 // Execute the query. 991 q := makeSQLQuery() 992 q.Append(`SELECT timestamp, "rangeID", "storeID", "eventType", "otherRangeID", info `) 993 q.Append("FROM system.rangelog ") 994 if req.RangeId > 0 { 995 rangeID := tree.NewDInt(tree.DInt(req.RangeId)) 996 q.Append(`WHERE "rangeID" = $ OR "otherRangeID" = $`, rangeID, rangeID) 997 } 998 if limit > 0 { 999 q.Append("ORDER BY timestamp desc ") 1000 q.Append("LIMIT $", tree.NewDInt(tree.DInt(limit))) 1001 } 1002 if len(q.Errors()) > 0 { 1003 return nil, s.serverErrors(q.Errors()) 1004 } 1005 rows, cols, err := s.server.sqlServer.internalExecutor.QueryWithCols( 1006 ctx, "admin-range-log", nil, /* txn */ 1007 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 1008 q.String(), q.QueryArguments()..., 1009 ) 1010 if err != nil { 1011 return nil, s.serverError(err) 1012 } 1013 1014 // Marshal response. 1015 var resp serverpb.RangeLogResponse 1016 if len(cols) != 6 { 1017 return nil, errors.Errorf("incorrect number of columns in response, expected 6, got %d", len(cols)) 1018 } 1019 scanner := makeResultScanner(cols) 1020 for _, row := range rows { 1021 var event kvserverpb.RangeLogEvent 1022 var ts time.Time 1023 if err := scanner.ScanIndex(row, 0, &ts); err != nil { 1024 return nil, errors.Wrapf(err, "timestamp didn't parse correctly: %s", row[0].String()) 1025 } 1026 event.Timestamp = ts 1027 var rangeID int64 1028 if err := scanner.ScanIndex(row, 1, &rangeID); err != nil { 1029 return nil, errors.Wrapf(err, "RangeID didn't parse correctly: %s", row[1].String()) 1030 } 1031 event.RangeID = roachpb.RangeID(rangeID) 1032 var storeID int64 1033 if err := scanner.ScanIndex(row, 2, &storeID); err != nil { 1034 return nil, errors.Wrapf(err, "StoreID didn't parse correctly: %s", row[2].String()) 1035 } 1036 event.StoreID = roachpb.StoreID(int32(storeID)) 1037 var eventTypeString string 1038 if err := scanner.ScanIndex(row, 3, &eventTypeString); err != nil { 1039 return nil, errors.Wrapf(err, "EventType didn't parse correctly: %s", row[3].String()) 1040 } 1041 if eventType, ok := kvserverpb.RangeLogEventType_value[eventTypeString]; ok { 1042 event.EventType = kvserverpb.RangeLogEventType(eventType) 1043 } else { 1044 return nil, errors.Errorf("EventType didn't parse correctly: %s", eventTypeString) 1045 } 1046 1047 var otherRangeID int64 1048 if row[4].String() != "NULL" { 1049 if err := scanner.ScanIndex(row, 4, &otherRangeID); err != nil { 1050 return nil, errors.Wrapf(err, "OtherRangeID didn't parse correctly: %s", row[4].String()) 1051 } 1052 event.OtherRangeID = roachpb.RangeID(otherRangeID) 1053 } 1054 1055 var prettyInfo serverpb.RangeLogResponse_PrettyInfo 1056 if row[5].String() != "NULL" { 1057 var info string 1058 if err := scanner.ScanIndex(row, 5, &info); err != nil { 1059 return nil, errors.Wrapf(err, "info didn't parse correctly: %s", row[5].String()) 1060 } 1061 if err := json.Unmarshal([]byte(info), &event.Info); err != nil { 1062 return nil, errors.Wrapf(err, "info didn't parse correctly: %s", info) 1063 } 1064 if event.Info.NewDesc != nil { 1065 if !includeRawKeys { 1066 event.Info.NewDesc.StartKey = nil 1067 event.Info.NewDesc.EndKey = nil 1068 } 1069 prettyInfo.NewDesc = event.Info.NewDesc.String() 1070 } 1071 if event.Info.UpdatedDesc != nil { 1072 if !includeRawKeys { 1073 event.Info.UpdatedDesc.StartKey = nil 1074 event.Info.UpdatedDesc.EndKey = nil 1075 } 1076 prettyInfo.UpdatedDesc = event.Info.UpdatedDesc.String() 1077 } 1078 if event.Info.AddedReplica != nil { 1079 prettyInfo.AddedReplica = event.Info.AddedReplica.String() 1080 } 1081 if event.Info.RemovedReplica != nil { 1082 prettyInfo.RemovedReplica = event.Info.RemovedReplica.String() 1083 } 1084 prettyInfo.Reason = string(event.Info.Reason) 1085 prettyInfo.Details = event.Info.Details 1086 } 1087 1088 resp.Events = append(resp.Events, serverpb.RangeLogResponse_Event{ 1089 Event: event, 1090 PrettyInfo: prettyInfo, 1091 }) 1092 } 1093 return &resp, nil 1094 } 1095 1096 // getUIData returns the values and timestamps for the given UI keys. Keys 1097 // that are not found will not be returned. 1098 func (s *adminServer) getUIData( 1099 ctx context.Context, userName string, keys []string, 1100 ) (*serverpb.GetUIDataResponse, error) { 1101 if len(keys) == 0 { 1102 return &serverpb.GetUIDataResponse{}, nil 1103 } 1104 1105 // Query database. 1106 query := makeSQLQuery() 1107 query.Append(`SELECT key, value, "lastUpdated" FROM system.ui WHERE key IN (`) 1108 for i, key := range keys { 1109 if i != 0 { 1110 query.Append(",") 1111 } 1112 query.Append("$", tree.NewDString(makeUIKey(userName, key))) 1113 } 1114 query.Append(");") 1115 if err := query.Errors(); err != nil { 1116 return nil, s.serverErrorf("error constructing query: %v", err) 1117 } 1118 rows, err := s.server.sqlServer.internalExecutor.QueryEx( 1119 ctx, "admin-getUIData", nil, /* txn */ 1120 sqlbase.InternalExecutorSessionDataOverride{User: security.RootUser}, 1121 query.String(), query.QueryArguments()..., 1122 ) 1123 if err != nil { 1124 return nil, s.serverError(err) 1125 } 1126 1127 // Marshal results. 1128 resp := serverpb.GetUIDataResponse{KeyValues: make(map[string]serverpb.GetUIDataResponse_Value)} 1129 for _, row := range rows { 1130 dKey, ok := tree.AsDString(row[0]) 1131 if !ok { 1132 return nil, s.serverErrorf("unexpected type for UI key: %T", row[0]) 1133 } 1134 _, key := splitUIKey(string(dKey)) 1135 dKey = tree.DString(key) 1136 1137 dValue, ok := row[1].(*tree.DBytes) 1138 if !ok { 1139 return nil, s.serverErrorf("unexpected type for UI value: %T", row[1]) 1140 } 1141 dLastUpdated, ok := row[2].(*tree.DTimestamp) 1142 if !ok { 1143 return nil, s.serverErrorf("unexpected type for UI lastUpdated: %T", row[2]) 1144 } 1145 1146 resp.KeyValues[string(dKey)] = serverpb.GetUIDataResponse_Value{ 1147 Value: []byte(*dValue), 1148 LastUpdated: dLastUpdated.Time, 1149 } 1150 } 1151 return &resp, nil 1152 } 1153 1154 // makeUIKey combines username and key to form a lookup key in 1155 // system.ui. 1156 // The username is combined to ensure that different users 1157 // can use different customizations. 1158 func makeUIKey(username, key string) string { 1159 return username + "$" + key 1160 } 1161 1162 // splitUIKey is the inverse of makeUIKey. 1163 // The caller must ensure that the value was produced by makeUIKey. 1164 func splitUIKey(combined string) (string, string) { 1165 pair := strings.SplitN(combined, "$", 2) 1166 return pair[0], pair[1] 1167 } 1168 1169 // SetUIData is an endpoint that stores the given key/value pairs in the 1170 // system.ui table. See GetUIData for more details on semantics. 1171 func (s *adminServer) SetUIData( 1172 ctx context.Context, req *serverpb.SetUIDataRequest, 1173 ) (*serverpb.SetUIDataResponse, error) { 1174 ctx = s.server.AnnotateCtx(ctx) 1175 1176 userName, err := userFromContext(ctx) 1177 if err != nil { 1178 return nil, err 1179 } 1180 1181 if len(req.KeyValues) == 0 { 1182 return nil, status.Errorf(codes.InvalidArgument, "KeyValues cannot be empty") 1183 } 1184 1185 for key, val := range req.KeyValues { 1186 // Do an upsert of the key. We update each key in a separate transaction to 1187 // avoid long-running transactions and possible deadlocks. 1188 query := `UPSERT INTO system.ui (key, value, "lastUpdated") VALUES ($1, $2, now())` 1189 rowsAffected, err := s.server.sqlServer.internalExecutor.ExecEx( 1190 ctx, "admin-set-ui-data", nil, /* txn */ 1191 sqlbase.InternalExecutorSessionDataOverride{ 1192 User: security.RootUser, 1193 }, 1194 query, makeUIKey(userName, key), val) 1195 if err != nil { 1196 return nil, s.serverError(err) 1197 } 1198 if rowsAffected != 1 { 1199 return nil, s.serverErrorf("rows affected %d != expected %d", rowsAffected, 1) 1200 } 1201 } 1202 1203 return &serverpb.SetUIDataResponse{}, nil 1204 } 1205 1206 // GetUIData returns data associated with the given keys, which was stored 1207 // earlier through SetUIData. 1208 // 1209 // The stored values are meant to be opaque to the server. In the rare case that 1210 // the server code needs to call this method, it should only read from keys that 1211 // have the prefix `serverUIDataKeyPrefix`. 1212 func (s *adminServer) GetUIData( 1213 ctx context.Context, req *serverpb.GetUIDataRequest, 1214 ) (*serverpb.GetUIDataResponse, error) { 1215 ctx = s.server.AnnotateCtx(ctx) 1216 1217 userName, err := userFromContext(ctx) 1218 if err != nil { 1219 return nil, err 1220 } 1221 1222 if len(req.Keys) == 0 { 1223 return nil, status.Errorf(codes.InvalidArgument, "keys cannot be empty") 1224 } 1225 1226 resp, err := s.getUIData(ctx, userName, req.Keys) 1227 if err != nil { 1228 return nil, s.serverError(err) 1229 } 1230 1231 return resp, nil 1232 } 1233 1234 // Settings returns settings associated with the given keys. 1235 func (s *adminServer) Settings( 1236 ctx context.Context, req *serverpb.SettingsRequest, 1237 ) (*serverpb.SettingsResponse, error) { 1238 keys := req.Keys 1239 if len(keys) == 0 { 1240 keys = settings.Keys() 1241 } 1242 1243 sessionUser, err := userFromContext(ctx) 1244 if err != nil { 1245 return nil, err 1246 } 1247 1248 isAdmin, err := s.hasAdminRole(ctx, sessionUser) 1249 if err != nil { 1250 return nil, err 1251 } 1252 1253 var lookupPurpose settings.LookupPurpose 1254 if isAdmin { 1255 // Root accesses can customize the purpose. 1256 // This is used by the UI to see all values (local access) 1257 // and `cockroach zip` to redact the values (telemetry). 1258 lookupPurpose = settings.LookupForReporting 1259 if req.UnredactedValues { 1260 lookupPurpose = settings.LookupForLocalAccess 1261 } 1262 } else { 1263 // Non-root access cannot see the values in any case. 1264 lookupPurpose = settings.LookupForReporting 1265 } 1266 1267 resp := serverpb.SettingsResponse{KeyValues: make(map[string]serverpb.SettingsResponse_Value)} 1268 for _, k := range keys { 1269 v, ok := settings.Lookup(k, lookupPurpose) 1270 if !ok { 1271 continue 1272 } 1273 resp.KeyValues[k] = serverpb.SettingsResponse_Value{ 1274 Type: v.Typ(), 1275 // Note: v.String() redacts the values if the purpose is not "LocalAccess". 1276 Value: v.String(&s.server.st.SV), 1277 Description: v.Description(), 1278 Public: v.Visibility() == settings.Public, 1279 } 1280 } 1281 1282 return &resp, nil 1283 } 1284 1285 // Cluster returns cluster metadata. 1286 func (s *adminServer) Cluster( 1287 _ context.Context, req *serverpb.ClusterRequest, 1288 ) (*serverpb.ClusterResponse, error) { 1289 clusterID := s.server.ClusterID() 1290 if clusterID == (uuid.UUID{}) { 1291 return nil, status.Errorf(codes.Unavailable, "cluster ID not yet available") 1292 } 1293 1294 // Check if enterprise features are enabled. We currently test for the 1295 // feature "BACKUP", although enterprise licenses do not yet distinguish 1296 // between different features. 1297 organization := sql.ClusterOrganization.Get(&s.server.st.SV) 1298 enterpriseEnabled := base.CheckEnterpriseEnabled(s.server.st, clusterID, organization, "BACKUP") == nil 1299 1300 return &serverpb.ClusterResponse{ 1301 ClusterID: clusterID.String(), 1302 ReportingEnabled: log.DiagnosticsReportingEnabled.Get(&s.server.st.SV), 1303 EnterpriseEnabled: enterpriseEnabled, 1304 }, nil 1305 } 1306 1307 // Health returns liveness for the node target of the request. 1308 // 1309 // See the docstring for HealthRequest for more details about 1310 // what this function precisely reports. 1311 // 1312 // Note: Health is non-privileged and non-authenticated and thus 1313 // must not report privileged information. 1314 func (s *adminServer) Health( 1315 ctx context.Context, req *serverpb.HealthRequest, 1316 ) (*serverpb.HealthResponse, error) { 1317 telemetry.Inc(telemetryHealthCheck) 1318 1319 resp := &serverpb.HealthResponse{} 1320 // If Ready is not set, the client doesn't want to know whether this node is 1321 // ready to receive client traffic. 1322 if !req.Ready { 1323 return resp, nil 1324 } 1325 1326 if err := s.checkReadinessForHealthCheck(); err != nil { 1327 return nil, err 1328 } 1329 return resp, nil 1330 } 1331 1332 func (s *adminServer) checkReadinessForHealthCheck() error { 1333 serveMode := s.server.grpc.mode.get() 1334 switch serveMode { 1335 case modeInitializing: 1336 return status.Error(codes.Unavailable, "node is waiting for cluster initialization") 1337 case modeDraining: 1338 // grpc.mode is set to modeDraining when the Drain(DrainMode_CLIENT) has 1339 // been called (client connections are to be drained). 1340 return status.Errorf(codes.Unavailable, "node is shutting down") 1341 case modeOperational: 1342 break 1343 default: 1344 return s.serverError(errors.Newf("unknown mode: %v", serveMode)) 1345 } 1346 1347 // TODO(knz): update this code when progress is made on 1348 // https://github.com/cockroachdb/cockroach/issues/45123 1349 l, err := s.server.nodeLiveness.GetLiveness(s.server.NodeID()) 1350 if err != nil { 1351 return s.serverError(err) 1352 } 1353 if !l.IsLive(s.server.clock.Now().GoTime()) { 1354 return status.Errorf(codes.Unavailable, "node is not healthy") 1355 } 1356 if l.Draining { 1357 // l.Draining indicates that the node is draining leases. 1358 // This is set when Drain(DrainMode_LEASES) is called. 1359 // It's possible that l.Draining is set without 1360 // grpc.mode being modeDraining, if a RPC client 1361 // has requested DrainMode_LEASES but not DrainMode_CLIENT. 1362 return status.Errorf(codes.Unavailable, "node is shutting down") 1363 } 1364 1365 return nil 1366 } 1367 1368 // getLivenessStatusMap generates a map from NodeID to LivenessStatus for all 1369 // nodes known to gossip. Nodes that haven't pinged their liveness record for 1370 // more than server.time_until_store_dead are considered dead. 1371 // 1372 // To include all nodes (including ones not in the gossip network), callers 1373 // should consider calling (statusServer).NodesWithLiveness() instead where 1374 // possible. 1375 // 1376 // getLivenessStatusMap() includes removed nodes (dead + decommissioned). 1377 func getLivenessStatusMap( 1378 nl *kvserver.NodeLiveness, now time.Time, st *cluster.Settings, 1379 ) map[roachpb.NodeID]kvserverpb.NodeLivenessStatus { 1380 livenesses := nl.GetLivenesses() 1381 threshold := kvserver.TimeUntilStoreDead.Get(&st.SV) 1382 1383 statusMap := make(map[roachpb.NodeID]kvserverpb.NodeLivenessStatus, len(livenesses)) 1384 for _, liveness := range livenesses { 1385 status := kvserver.LivenessStatus(liveness, now, threshold) 1386 statusMap[liveness.NodeID] = status 1387 } 1388 return statusMap 1389 } 1390 1391 // Liveness returns the liveness state of all nodes on the cluster 1392 // known to gossip. To reach all nodes in the cluster, consider 1393 // using (statusServer).NodesWithLiveness instead. 1394 func (s *adminServer) Liveness( 1395 context.Context, *serverpb.LivenessRequest, 1396 ) (*serverpb.LivenessResponse, error) { 1397 clock := s.server.clock 1398 statusMap := getLivenessStatusMap( 1399 s.server.nodeLiveness, clock.Now().GoTime(), s.server.st) 1400 livenesses := s.server.nodeLiveness.GetLivenesses() 1401 return &serverpb.LivenessResponse{ 1402 Livenesses: livenesses, 1403 Statuses: statusMap, 1404 }, nil 1405 } 1406 1407 func (s *adminServer) Jobs( 1408 ctx context.Context, req *serverpb.JobsRequest, 1409 ) (*serverpb.JobsResponse, error) { 1410 ctx = s.server.AnnotateCtx(ctx) 1411 1412 userName, err := userFromContext(ctx) 1413 if err != nil { 1414 return nil, err 1415 } 1416 1417 q := makeSQLQuery() 1418 q.Append(` 1419 SELECT job_id, job_type, description, statement, user_name, descriptor_ids, status, 1420 running_status, created, started, finished, modified, 1421 fraction_completed, high_water_timestamp, error 1422 FROM crdb_internal.jobs 1423 WHERE true 1424 `) 1425 if req.Status != "" { 1426 q.Append(" AND status = $", req.Status) 1427 } 1428 if req.Type != jobspb.TypeUnspecified { 1429 q.Append(" AND job_type = $", req.Type.String()) 1430 } else { 1431 // Don't show auto stats jobs in the overview page. 1432 q.Append(" AND (job_type != $ OR job_type IS NULL)", jobspb.TypeAutoCreateStats.String()) 1433 } 1434 q.Append("ORDER BY created DESC") 1435 if req.Limit > 0 { 1436 q.Append(" LIMIT $", tree.DInt(req.Limit)) 1437 } 1438 rows, cols, err := s.server.sqlServer.internalExecutor.QueryWithCols( 1439 ctx, "admin-jobs", nil, /* txn */ 1440 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 1441 q.String(), q.QueryArguments()..., 1442 ) 1443 if err != nil { 1444 return nil, s.serverError(err) 1445 } 1446 1447 scanner := makeResultScanner(cols) 1448 resp := serverpb.JobsResponse{ 1449 Jobs: make([]serverpb.JobsResponse_Job, len(rows)), 1450 } 1451 for i, row := range rows { 1452 job := &resp.Jobs[i] 1453 var fractionCompletedOrNil *float32 1454 var highwaterOrNil *apd.Decimal 1455 var runningStatusOrNil *string 1456 if err := scanner.ScanAll( 1457 row, 1458 &job.ID, 1459 &job.Type, 1460 &job.Description, 1461 &job.Statement, 1462 &job.Username, 1463 &job.DescriptorIDs, 1464 &job.Status, 1465 &runningStatusOrNil, 1466 &job.Created, 1467 &job.Started, 1468 &job.Finished, 1469 &job.Modified, 1470 &fractionCompletedOrNil, 1471 &highwaterOrNil, 1472 &job.Error, 1473 ); err != nil { 1474 return nil, s.serverError(err) 1475 } 1476 if highwaterOrNil != nil { 1477 highwaterTimestamp, err := tree.DecimalToHLC(highwaterOrNil) 1478 if err != nil { 1479 return nil, s.serverError(errors.Wrap(err, "highwater timestamp had unexpected format")) 1480 } 1481 goTime := highwaterTimestamp.GoTime() 1482 job.HighwaterTimestamp = &goTime 1483 job.HighwaterDecimal = highwaterOrNil.String() 1484 } 1485 if fractionCompletedOrNil != nil { 1486 job.FractionCompleted = *fractionCompletedOrNil 1487 } 1488 if runningStatusOrNil != nil { 1489 job.RunningStatus = *runningStatusOrNil 1490 } 1491 } 1492 1493 return &resp, nil 1494 } 1495 1496 func (s *adminServer) Locations( 1497 ctx context.Context, req *serverpb.LocationsRequest, 1498 ) (*serverpb.LocationsResponse, error) { 1499 ctx = s.server.AnnotateCtx(ctx) 1500 1501 userName, err := userFromContext(ctx) 1502 if err != nil { 1503 return nil, err 1504 } 1505 1506 q := makeSQLQuery() 1507 q.Append(`SELECT "localityKey", "localityValue", latitude, longitude FROM system.locations`) 1508 rows, cols, err := s.server.sqlServer.internalExecutor.QueryWithCols( 1509 ctx, "admin-locations", nil, /* txn */ 1510 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 1511 q.String(), 1512 ) 1513 if err != nil { 1514 return nil, s.serverError(err) 1515 } 1516 1517 scanner := makeResultScanner(cols) 1518 resp := serverpb.LocationsResponse{ 1519 Locations: make([]serverpb.LocationsResponse_Location, len(rows)), 1520 } 1521 for i, row := range rows { 1522 loc := &resp.Locations[i] 1523 lat, lon := new(apd.Decimal), new(apd.Decimal) 1524 if err := scanner.ScanAll( 1525 row, &loc.LocalityKey, &loc.LocalityValue, lat, lon); err != nil { 1526 return nil, s.serverError(err) 1527 } 1528 if loc.Latitude, err = lat.Float64(); err != nil { 1529 return nil, s.serverError(err) 1530 } 1531 if loc.Longitude, err = lon.Float64(); err != nil { 1532 return nil, s.serverError(err) 1533 } 1534 } 1535 1536 return &resp, nil 1537 } 1538 1539 // QueryPlan returns a JSON representation of a distsql physical query 1540 // plan. 1541 func (s *adminServer) QueryPlan( 1542 ctx context.Context, req *serverpb.QueryPlanRequest, 1543 ) (*serverpb.QueryPlanResponse, error) { 1544 ctx = s.server.AnnotateCtx(ctx) 1545 1546 userName, err := userFromContext(ctx) 1547 if err != nil { 1548 return nil, err 1549 } 1550 1551 // As long as there's only one query provided it's safe to construct the 1552 // explain query. 1553 stmts, err := parser.Parse(req.Query) 1554 if err != nil { 1555 return nil, s.serverError(err) 1556 } 1557 if len(stmts) > 1 { 1558 return nil, s.serverErrorf("more than one query provided") 1559 } 1560 1561 explain := fmt.Sprintf( 1562 "SELECT json FROM [EXPLAIN (DISTSQL) %s]", 1563 strings.Trim(req.Query, ";")) 1564 rows, err := s.server.sqlServer.internalExecutor.QueryEx( 1565 ctx, "admin-query-plan", nil, /* txn */ 1566 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 1567 explain, 1568 ) 1569 if err != nil { 1570 return nil, s.serverError(err) 1571 } 1572 1573 row := rows[0] 1574 dbDatum, ok := tree.AsDString(row[0]) 1575 if !ok { 1576 return nil, s.serverErrorf("type assertion failed on json: %T", row) 1577 } 1578 1579 return &serverpb.QueryPlanResponse{ 1580 DistSQLPhysicalQueryPlan: string(dbDatum), 1581 }, nil 1582 } 1583 1584 // getStatementBundle retrieves the statement bundle with the given id and 1585 // writes it out as an attachment. 1586 func (s *adminServer) getStatementBundle(ctx context.Context, id int64, w http.ResponseWriter) { 1587 sessionUser, err := userFromContext(ctx) 1588 if err != nil { 1589 http.Error(w, err.Error(), http.StatusInternalServerError) 1590 return 1591 } 1592 row, err := s.server.sqlServer.internalExecutor.QueryRowEx( 1593 ctx, "admin-stmt-bundle", nil, /* txn */ 1594 sqlbase.InternalExecutorSessionDataOverride{User: sessionUser}, 1595 "SELECT bundle_chunks FROM system.statement_diagnostics WHERE id=$1 AND bundle_chunks IS NOT NULL", 1596 id, 1597 ) 1598 if err != nil { 1599 http.Error(w, err.Error(), http.StatusInternalServerError) 1600 return 1601 } 1602 if row == nil { 1603 http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) 1604 return 1605 } 1606 // Put together the entire bundle. Ideally we would stream it in chunks, 1607 // but it's hard to return errors once we start. 1608 var bundle bytes.Buffer 1609 chunkIDs := row[0].(*tree.DArray).Array 1610 for _, chunkID := range chunkIDs { 1611 chunkRow, err := s.server.sqlServer.internalExecutor.QueryRowEx( 1612 ctx, "admin-stmt-bundle", nil, /* txn */ 1613 sqlbase.InternalExecutorSessionDataOverride{User: sessionUser}, 1614 "SELECT data FROM system.statement_bundle_chunks WHERE id=$1", 1615 chunkID, 1616 ) 1617 if err != nil { 1618 http.Error(w, err.Error(), http.StatusInternalServerError) 1619 return 1620 } 1621 if row == nil { 1622 http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) 1623 return 1624 } 1625 data := chunkRow[0].(*tree.DBytes) 1626 bundle.WriteString(string(*data)) 1627 } 1628 1629 w.Header().Set( 1630 "Content-Disposition", 1631 fmt.Sprintf("attachment; filename=stmt-bundle-%d.zip", id), 1632 ) 1633 1634 _, _ = io.Copy(w, &bundle) 1635 } 1636 1637 // DecommissionStatus returns the DecommissionStatus for all or the given nodes. 1638 func (s *adminServer) DecommissionStatus( 1639 ctx context.Context, req *serverpb.DecommissionStatusRequest, 1640 ) (*serverpb.DecommissionStatusResponse, error) { 1641 // Get the number of replicas on each node. We *may* not need all of them, 1642 // but that would be more complicated than seems worth it right now. 1643 ns, err := s.server.status.Nodes(ctx, &serverpb.NodesRequest{}) 1644 if err != nil { 1645 return nil, errors.Wrap(err, "loading node statuses") 1646 } 1647 1648 nodeIDs := req.NodeIDs 1649 // If no nodeIDs given, use all nodes. 1650 if len(nodeIDs) == 0 { 1651 for _, status := range ns.Nodes { 1652 nodeIDs = append(nodeIDs, status.Desc.NodeID) 1653 } 1654 } 1655 1656 // Compute the replica counts for the target nodes only. This map doubles as 1657 // a lookup table to check whether we care about a given node. 1658 var replicaCounts map[roachpb.NodeID]int64 1659 if err := s.server.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 1660 const pageSize = 10000 1661 replicaCounts = make(map[roachpb.NodeID]int64) 1662 for _, nodeID := range nodeIDs { 1663 replicaCounts[nodeID] = 0 1664 } 1665 return txn.Iterate(ctx, keys.MetaMin, keys.MetaMax, pageSize, 1666 func(rows []kv.KeyValue) error { 1667 rangeDesc := roachpb.RangeDescriptor{} 1668 for _, row := range rows { 1669 if err := row.ValueProto(&rangeDesc); err != nil { 1670 return errors.Wrapf(err, "%s: unable to unmarshal range descriptor", row.Key) 1671 } 1672 for _, r := range rangeDesc.Replicas().All() { 1673 if _, ok := replicaCounts[r.NodeID]; ok { 1674 replicaCounts[r.NodeID]++ 1675 } 1676 } 1677 } 1678 return nil 1679 }) 1680 }); err != nil { 1681 return nil, err 1682 } 1683 1684 var res serverpb.DecommissionStatusResponse 1685 1686 for nodeID := range replicaCounts { 1687 l, err := s.server.nodeLiveness.GetLiveness(nodeID) 1688 if err != nil { 1689 return nil, errors.Wrapf(err, "unable to get liveness for %d", nodeID) 1690 } 1691 nodeResp := serverpb.DecommissionStatusResponse_Status{ 1692 NodeID: l.NodeID, 1693 ReplicaCount: replicaCounts[l.NodeID], 1694 Decommissioning: l.Decommissioning, 1695 Draining: l.Draining, 1696 } 1697 if l.IsLive(s.server.clock.Now().GoTime()) { 1698 nodeResp.IsLive = true 1699 } 1700 1701 res.Status = append(res.Status, nodeResp) 1702 } 1703 1704 sort.Slice(res.Status, func(i, j int) bool { 1705 return res.Status[i].NodeID < res.Status[j].NodeID 1706 }) 1707 1708 return &res, nil 1709 } 1710 1711 // Decommission sets the decommission flag to the specified value on the specified node(s). 1712 func (s *adminServer) Decommission( 1713 ctx context.Context, req *serverpb.DecommissionRequest, 1714 ) (*serverpb.DecommissionStatusResponse, error) { 1715 nodeIDs := req.NodeIDs 1716 if nodeIDs == nil { 1717 // If no NodeIDs are specified, decommission the current node. This is 1718 // used by `quit --decommission`. 1719 // TODO(knz): This behavior is deprecated in 20.1. Remove in 20.2. 1720 nodeIDs = []roachpb.NodeID{s.server.NodeID()} 1721 } 1722 1723 // Mark the target nodes as decommissioning. They'll find out as they 1724 // heartbeat their liveness. 1725 if err := s.server.Decommission(ctx, req.Decommissioning, nodeIDs); err != nil { 1726 return nil, err 1727 } 1728 return s.DecommissionStatus(ctx, &serverpb.DecommissionStatusRequest{NodeIDs: nodeIDs}) 1729 } 1730 1731 // DataDistribution returns a count of replicas on each node for each table. 1732 func (s *adminServer) DataDistribution( 1733 ctx context.Context, req *serverpb.DataDistributionRequest, 1734 ) (*serverpb.DataDistributionResponse, error) { 1735 if _, err := s.requireAdminUser(ctx); err != nil { 1736 return nil, err 1737 } 1738 1739 resp := &serverpb.DataDistributionResponse{ 1740 DatabaseInfo: make(map[string]serverpb.DataDistributionResponse_DatabaseInfo), 1741 ZoneConfigs: make(map[string]serverpb.DataDistributionResponse_ZoneConfig), 1742 } 1743 1744 userName, err := userFromContext(ctx) 1745 if err != nil { 1746 return nil, err 1747 } 1748 1749 // Get ids and names for databases and tables. 1750 // Set up this structure in the response. 1751 1752 // This relies on crdb_internal.tables returning data even for newly added tables 1753 // and deleted tables (as opposed to e.g. information_schema) because we are interested 1754 // in the data for all ranges, not just ranges for visible tables. 1755 tablesQuery := `SELECT name, table_id, database_name, drop_time FROM "".crdb_internal.tables WHERE schema_name = 'public'` 1756 rows1, err := s.server.sqlServer.internalExecutor.QueryEx( 1757 ctx, "admin-replica-matrix", nil, /* txn */ 1758 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 1759 tablesQuery, 1760 ) 1761 if err != nil { 1762 return nil, s.serverError(err) 1763 } 1764 1765 // Used later when we're scanning Meta2 and only have IDs, not names. 1766 tableInfosByTableID := map[uint32]serverpb.DataDistributionResponse_TableInfo{} 1767 1768 for _, row := range rows1 { 1769 tableName := (*string)(row[0].(*tree.DString)) 1770 tableID := uint32(tree.MustBeDInt(row[1])) 1771 dbName := (*string)(row[2].(*tree.DString)) 1772 1773 // Look at whether it was dropped. 1774 var droppedAtTime *time.Time 1775 droppedAtDatum, ok := row[3].(*tree.DTimestamp) 1776 if ok { 1777 droppedAtTime = &droppedAtDatum.Time 1778 } 1779 1780 // Insert database if it doesn't exist. 1781 dbInfo, ok := resp.DatabaseInfo[*dbName] 1782 if !ok { 1783 dbInfo = serverpb.DataDistributionResponse_DatabaseInfo{ 1784 TableInfo: make(map[string]serverpb.DataDistributionResponse_TableInfo), 1785 } 1786 resp.DatabaseInfo[*dbName] = dbInfo 1787 } 1788 1789 // Get zone config for table. 1790 zcID := int64(0) 1791 1792 if droppedAtTime == nil { 1793 // TODO(vilterp): figure out a way to get zone configs for tables that are dropped 1794 zoneConfigQuery := fmt.Sprintf( 1795 `SELECT zone_id FROM [SHOW ZONE CONFIGURATION FOR TABLE %s.%s]`, 1796 (*tree.Name)(dbName), (*tree.Name)(tableName), 1797 ) 1798 rows, err := s.server.sqlServer.internalExecutor.QueryEx( 1799 ctx, "admin-replica-matrix", nil, /* txn */ 1800 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 1801 zoneConfigQuery, 1802 ) 1803 if err != nil { 1804 return nil, s.serverError(err) 1805 } 1806 1807 if len(rows) != 1 { 1808 return nil, s.serverError(fmt.Errorf( 1809 "could not get zone config for table %s; %d rows returned", *tableName, len(rows), 1810 )) 1811 } 1812 zcRow := rows[0] 1813 zcID = int64(tree.MustBeDInt(zcRow[0])) 1814 } 1815 1816 // Insert table. 1817 tableInfo := serverpb.DataDistributionResponse_TableInfo{ 1818 ReplicaCountByNodeId: make(map[roachpb.NodeID]int64), 1819 ZoneConfigId: zcID, 1820 DroppedAt: droppedAtTime, 1821 } 1822 dbInfo.TableInfo[*tableName] = tableInfo 1823 tableInfosByTableID[tableID] = tableInfo 1824 } 1825 1826 // Get replica counts. 1827 if err := s.server.db.Txn(ctx, func(txnCtx context.Context, txn *kv.Txn) error { 1828 acct := s.memMonitor.MakeBoundAccount() 1829 defer acct.Close(txnCtx) 1830 1831 kvs, err := sql.ScanMetaKVs(ctx, txn, roachpb.Span{ 1832 Key: keys.UserTableDataMin, 1833 EndKey: keys.MaxKey, 1834 }) 1835 if err != nil { 1836 return err 1837 } 1838 1839 // Group replicas by table and node, accumulate counts. 1840 var rangeDesc roachpb.RangeDescriptor 1841 for _, kv := range kvs { 1842 if err := acct.Grow(txnCtx, int64(len(kv.Key)+len(kv.Value.RawBytes))); err != nil { 1843 return err 1844 } 1845 if err := kv.ValueProto(&rangeDesc); err != nil { 1846 return err 1847 } 1848 1849 _, tableID, err := keys.TODOSQLCodec.DecodeTablePrefix(rangeDesc.StartKey.AsRawKey()) 1850 if err != nil { 1851 return err 1852 } 1853 1854 for _, replicaDesc := range rangeDesc.Replicas().All() { 1855 tableInfo, ok := tableInfosByTableID[tableID] 1856 if !ok { 1857 // This is a database, skip. 1858 continue 1859 } 1860 tableInfo.ReplicaCountByNodeId[replicaDesc.NodeID]++ 1861 } 1862 } 1863 return nil 1864 }); err != nil { 1865 return nil, s.serverError(err) 1866 } 1867 1868 // Get zone configs. 1869 // TODO(vilterp): this can be done in parallel with getting table/db names and replica counts. 1870 zoneConfigsQuery := ` 1871 SELECT target, raw_config_sql, raw_config_protobuf 1872 FROM crdb_internal.zones 1873 WHERE target IS NOT NULL 1874 ` 1875 rows2, err := s.server.sqlServer.internalExecutor.QueryEx( 1876 ctx, "admin-replica-matrix", nil, /* txn */ 1877 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 1878 zoneConfigsQuery) 1879 if err != nil { 1880 return nil, s.serverError(err) 1881 } 1882 1883 for _, row := range rows2 { 1884 target := string(tree.MustBeDString(row[0])) 1885 zcSQL := tree.MustBeDString(row[1]) 1886 zcBytes := tree.MustBeDBytes(row[2]) 1887 var zcProto zonepb.ZoneConfig 1888 if err := protoutil.Unmarshal([]byte(zcBytes), &zcProto); err != nil { 1889 return nil, s.serverError(err) 1890 } 1891 1892 resp.ZoneConfigs[target] = serverpb.DataDistributionResponse_ZoneConfig{ 1893 Target: target, 1894 Config: zcProto, 1895 ConfigSQL: string(zcSQL), 1896 } 1897 } 1898 1899 return resp, nil 1900 } 1901 1902 // EnqueueRange runs the specified range through the specified queue, returning 1903 // the detailed trace and error information from doing so. 1904 func (s *adminServer) EnqueueRange( 1905 ctx context.Context, req *serverpb.EnqueueRangeRequest, 1906 ) (*serverpb.EnqueueRangeResponse, error) { 1907 if _, err := s.requireAdminUser(ctx); err != nil { 1908 return nil, err 1909 } 1910 1911 if !debug.GatewayRemoteAllowed(ctx, s.server.ClusterSettings()) { 1912 return nil, remoteDebuggingErr 1913 } 1914 1915 ctx = propagateGatewayMetadata(ctx) 1916 ctx = s.server.AnnotateCtx(ctx) 1917 1918 if req.NodeID < 0 { 1919 return nil, status.Errorf(codes.InvalidArgument, "node_id must be non-negative; got %d", req.NodeID) 1920 } 1921 if req.Queue == "" { 1922 return nil, status.Errorf(codes.InvalidArgument, "queue name must be non-empty") 1923 } 1924 if req.RangeID <= 0 { 1925 return nil, status.Errorf(codes.InvalidArgument, "range_id must be positive; got %d", req.RangeID) 1926 } 1927 1928 // If the request is targeted at this node, serve it directly. Otherwise, 1929 // forward it to the appropriate node(s). If no node was specified, forward 1930 // it to all nodes. 1931 if req.NodeID == s.server.NodeID() { 1932 return s.enqueueRangeLocal(ctx, req) 1933 } else if req.NodeID != 0 { 1934 admin, err := s.dialNode(ctx, req.NodeID) 1935 if err != nil { 1936 return nil, err 1937 } 1938 return admin.EnqueueRange(ctx, req) 1939 } 1940 1941 response := &serverpb.EnqueueRangeResponse{} 1942 1943 dialFn := func(ctx context.Context, nodeID roachpb.NodeID) (interface{}, error) { 1944 client, err := s.dialNode(ctx, nodeID) 1945 return client, err 1946 } 1947 nodeFn := func(ctx context.Context, client interface{}, nodeID roachpb.NodeID) (interface{}, error) { 1948 admin := client.(serverpb.AdminClient) 1949 req := *req 1950 req.NodeID = nodeID 1951 return admin.EnqueueRange(ctx, &req) 1952 } 1953 responseFn := func(_ roachpb.NodeID, nodeResp interface{}) { 1954 nodeDetails := nodeResp.(*serverpb.EnqueueRangeResponse) 1955 response.Details = append(response.Details, nodeDetails.Details...) 1956 } 1957 errorFn := func(nodeID roachpb.NodeID, err error) { 1958 errDetail := &serverpb.EnqueueRangeResponse_Details{ 1959 NodeID: nodeID, 1960 Error: err.Error(), 1961 } 1962 response.Details = append(response.Details, errDetail) 1963 } 1964 1965 if err := contextutil.RunWithTimeout(ctx, "enqueue range", time.Minute, func(ctx context.Context) error { 1966 return s.server.status.iterateNodes( 1967 ctx, fmt.Sprintf("enqueue r%d in queue %s", req.RangeID, req.Queue), 1968 dialFn, nodeFn, responseFn, errorFn, 1969 ) 1970 }); err != nil { 1971 if len(response.Details) == 0 { 1972 return nil, err 1973 } 1974 response.Details = append(response.Details, &serverpb.EnqueueRangeResponse_Details{ 1975 Error: err.Error(), 1976 }) 1977 } 1978 1979 return response, nil 1980 } 1981 1982 // enqueueRangeLocal checks whether the local node has a replica for the 1983 // requested range that can be run through the queue, running it through the 1984 // queue and returning trace/error information if so. If not, returns an empty 1985 // response. 1986 func (s *adminServer) enqueueRangeLocal( 1987 ctx context.Context, req *serverpb.EnqueueRangeRequest, 1988 ) (*serverpb.EnqueueRangeResponse, error) { 1989 response := &serverpb.EnqueueRangeResponse{ 1990 Details: []*serverpb.EnqueueRangeResponse_Details{ 1991 { 1992 NodeID: s.server.NodeID(), 1993 }, 1994 }, 1995 } 1996 1997 var store *kvserver.Store 1998 var repl *kvserver.Replica 1999 if err := s.server.node.stores.VisitStores(func(s *kvserver.Store) error { 2000 r, err := s.GetReplica(req.RangeID) 2001 if roachpb.IsRangeNotFoundError(err) { 2002 return nil 2003 } 2004 if err != nil { 2005 return err 2006 } 2007 repl = r 2008 store = s 2009 return nil 2010 }); err != nil { 2011 response.Details[0].Error = err.Error() 2012 return response, nil 2013 } 2014 2015 if store == nil || repl == nil { 2016 response.Details[0].Error = fmt.Sprintf("n%d has no replica for r%d", s.server.NodeID(), req.RangeID) 2017 return response, nil 2018 } 2019 2020 traceSpans, processErr, err := store.ManuallyEnqueue(ctx, req.Queue, repl, req.SkipShouldQueue) 2021 if err != nil { 2022 response.Details[0].Error = err.Error() 2023 return response, nil 2024 } 2025 response.Details[0].Events = recordedSpansToTraceEvents(traceSpans) 2026 if processErr != nil { 2027 response.Details[0].Error = processErr.Error() 2028 } 2029 return response, nil 2030 } 2031 2032 // sqlQuery allows you to incrementally build a SQL query that uses 2033 // placeholders. Instead of specific placeholders like $1, you instead use the 2034 // temporary placeholder $. 2035 type sqlQuery struct { 2036 buf bytes.Buffer 2037 pidx int 2038 qargs []interface{} 2039 errs []error 2040 } 2041 2042 func makeSQLQuery() *sqlQuery { 2043 res := &sqlQuery{} 2044 return res 2045 } 2046 2047 // String returns the full query. 2048 func (q *sqlQuery) String() string { 2049 if len(q.errs) > 0 { 2050 return "couldn't generate query: please check Errors()" 2051 } 2052 return q.buf.String() 2053 } 2054 2055 // Errors returns a slice containing all errors that have happened during the 2056 // construction of this query. 2057 func (q *sqlQuery) Errors() []error { 2058 return q.errs 2059 } 2060 2061 // QueryArguments returns a filled map of placeholders containing all arguments 2062 // provided to this query through Append. 2063 func (q *sqlQuery) QueryArguments() []interface{} { 2064 return q.qargs 2065 } 2066 2067 // Append appends the provided string and any number of query parameters. 2068 // Instead of using normal placeholders (e.g. $1, $2), use meta-placeholder $. 2069 // This method rewrites the query so that it uses proper placeholders. 2070 // 2071 // For example, suppose we have the following calls: 2072 // 2073 // query.Append("SELECT * FROM foo WHERE a > $ AND a < $ ", arg1, arg2) 2074 // query.Append("LIMIT $", limit) 2075 // 2076 // The query is rewritten into: 2077 // 2078 // SELECT * FROM foo WHERE a > $1 AND a < $2 LIMIT $3 2079 // /* $1 = arg1, $2 = arg2, $3 = limit */ 2080 // 2081 // Note that this method does NOT return any errors. Instead, we queue up 2082 // errors, which can later be accessed. Returning an error here would make 2083 // query construction code exceedingly tedious. 2084 func (q *sqlQuery) Append(s string, params ...interface{}) { 2085 var placeholders int 2086 for _, r := range s { 2087 q.buf.WriteRune(r) 2088 if r == '$' { 2089 q.pidx++ 2090 placeholders++ 2091 q.buf.WriteString(strconv.Itoa(q.pidx)) // SQL placeholders are 1-based 2092 } 2093 } 2094 2095 if placeholders != len(params) { 2096 q.errs = append(q.errs, 2097 errors.Errorf("# of placeholders %d != # of params %d", placeholders, len(params))) 2098 } 2099 q.qargs = append(q.qargs, params...) 2100 } 2101 2102 // resultScanner scans columns from sql.ResultRow instances into variables, 2103 // performing the appropriate casting and error detection along the way. 2104 type resultScanner struct { 2105 colNameToIdx map[string]int 2106 } 2107 2108 func makeResultScanner(cols []sqlbase.ResultColumn) resultScanner { 2109 rs := resultScanner{ 2110 colNameToIdx: make(map[string]int), 2111 } 2112 for i, col := range cols { 2113 rs.colNameToIdx[col.Name] = i 2114 } 2115 return rs 2116 } 2117 2118 // IsNull returns whether the specified column of the given row contains 2119 // a SQL NULL value. 2120 func (rs resultScanner) IsNull(row tree.Datums, col string) (bool, error) { 2121 idx, ok := rs.colNameToIdx[col] 2122 if !ok { 2123 return false, errors.Errorf("result is missing column %s", col) 2124 } 2125 return row[idx] == tree.DNull, nil 2126 } 2127 2128 // ScanIndex scans the given column index of the given row into dst. 2129 func (rs resultScanner) ScanIndex(row tree.Datums, index int, dst interface{}) error { 2130 src := row[index] 2131 2132 if dst == nil { 2133 return errors.Errorf("nil destination pointer passed in") 2134 } 2135 2136 switch d := dst.(type) { 2137 case *string: 2138 s, ok := tree.AsDString(src) 2139 if !ok { 2140 return errors.Errorf("source type assertion failed") 2141 } 2142 *d = string(s) 2143 2144 case **string: 2145 s, ok := tree.AsDString(src) 2146 if !ok { 2147 if src != tree.DNull { 2148 return errors.Errorf("source type assertion failed") 2149 } 2150 *d = nil 2151 break 2152 } 2153 val := string(s) 2154 *d = &val 2155 2156 case *bool: 2157 s, ok := src.(*tree.DBool) 2158 if !ok { 2159 return errors.Errorf("source type assertion failed") 2160 } 2161 *d = bool(*s) 2162 2163 case *float32: 2164 s, ok := src.(*tree.DFloat) 2165 if !ok { 2166 return errors.Errorf("source type assertion failed") 2167 } 2168 *d = float32(*s) 2169 2170 case **float32: 2171 s, ok := src.(*tree.DFloat) 2172 if !ok { 2173 if src != tree.DNull { 2174 return errors.Errorf("source type assertion failed") 2175 } 2176 *d = nil 2177 break 2178 } 2179 val := float32(*s) 2180 *d = &val 2181 2182 case *int64: 2183 s, ok := tree.AsDInt(src) 2184 if !ok { 2185 return errors.Errorf("source type assertion failed") 2186 } 2187 *d = int64(s) 2188 2189 case *[]sqlbase.ID: 2190 s, ok := tree.AsDArray(src) 2191 if !ok { 2192 return errors.Errorf("source type assertion failed") 2193 } 2194 for i := 0; i < s.Len(); i++ { 2195 id, ok := tree.AsDInt(s.Array[i]) 2196 if !ok { 2197 return errors.Errorf("source type assertion failed on index %d", i) 2198 } 2199 *d = append(*d, sqlbase.ID(id)) 2200 } 2201 2202 case *time.Time: 2203 s, ok := src.(*tree.DTimestamp) 2204 if !ok { 2205 return errors.Errorf("source type assertion failed") 2206 } 2207 *d = s.Time 2208 2209 // Passing a **time.Time instead of a *time.Time means the source is allowed 2210 // to be NULL, in which case nil is stored into *src. 2211 case **time.Time: 2212 s, ok := src.(*tree.DTimestamp) 2213 if !ok { 2214 if src != tree.DNull { 2215 return errors.Errorf("source type assertion failed") 2216 } 2217 *d = nil 2218 break 2219 } 2220 *d = &s.Time 2221 2222 case *[]byte: 2223 s, ok := src.(*tree.DBytes) 2224 if !ok { 2225 return errors.Errorf("source type assertion failed") 2226 } 2227 // Yes, this copies, but this probably isn't in the critical path. 2228 *d = []byte(*s) 2229 2230 case *apd.Decimal: 2231 s, ok := src.(*tree.DDecimal) 2232 if !ok { 2233 return errors.Errorf("source type assertion failed") 2234 } 2235 *d = s.Decimal 2236 2237 case **apd.Decimal: 2238 s, ok := src.(*tree.DDecimal) 2239 if !ok { 2240 if src != tree.DNull { 2241 return errors.Errorf("source type assertion failed") 2242 } 2243 *d = nil 2244 break 2245 } 2246 *d = &s.Decimal 2247 2248 default: 2249 return errors.Errorf("unimplemented type for scanCol: %T", dst) 2250 } 2251 2252 return nil 2253 } 2254 2255 // ScanAll scans all the columns from the given row, in order, into dsts. 2256 func (rs resultScanner) ScanAll(row tree.Datums, dsts ...interface{}) error { 2257 if len(row) != len(dsts) { 2258 return fmt.Errorf( 2259 "ScanAll: row has %d columns but %d dests provided", len(row), len(dsts)) 2260 } 2261 for i := 0; i < len(row); i++ { 2262 if err := rs.ScanIndex(row, i, dsts[i]); err != nil { 2263 return err 2264 } 2265 } 2266 return nil 2267 } 2268 2269 // Scan scans the column with the given name from the given row into dst. 2270 func (rs resultScanner) Scan(row tree.Datums, colName string, dst interface{}) error { 2271 idx, ok := rs.colNameToIdx[colName] 2272 if !ok { 2273 return errors.Errorf("result is missing column %s", colName) 2274 } 2275 return rs.ScanIndex(row, idx, dst) 2276 } 2277 2278 // TODO(mrtracy): The following methods, used to look up the zone configuration 2279 // for a database or table, use the same algorithm as a set of methods in 2280 // cli/zone.go for the same purpose. However, as that code connects to the 2281 // server with a SQL connections, while this code uses the InternalExecutor, the 2282 // code cannot be commonized. 2283 // 2284 // queryZone retrieves the specific ZoneConfig associated with the supplied ID, 2285 // if it exists. 2286 func (s *adminServer) queryZone( 2287 ctx context.Context, userName string, id sqlbase.ID, 2288 ) (zonepb.ZoneConfig, bool, error) { 2289 const query = `SELECT crdb_internal.get_zone_config($1)` 2290 rows, cols, err := s.server.sqlServer.internalExecutor.QueryWithCols( 2291 ctx, 2292 "admin-query-zone", 2293 nil, /* txn */ 2294 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 2295 query, 2296 id, 2297 ) 2298 if err != nil { 2299 return *zonepb.NewZoneConfig(), false, err 2300 } 2301 2302 if len(rows) != 1 { 2303 return *zonepb.NewZoneConfig(), false, errors.Errorf("invalid number of rows returned: %s (%d)", query, id) 2304 } 2305 2306 var zoneBytes []byte 2307 scanner := makeResultScanner(cols) 2308 if isNull, err := scanner.IsNull(rows[0], cols[0].Name); err != nil { 2309 return *zonepb.NewZoneConfig(), false, err 2310 } else if isNull { 2311 return *zonepb.NewZoneConfig(), false, nil 2312 } 2313 2314 err = scanner.ScanIndex(rows[0], 0, &zoneBytes) 2315 if err != nil { 2316 return *zonepb.NewZoneConfig(), false, err 2317 } 2318 2319 var zone zonepb.ZoneConfig 2320 if err := protoutil.Unmarshal(zoneBytes, &zone); err != nil { 2321 return *zonepb.NewZoneConfig(), false, err 2322 } 2323 return zone, true, nil 2324 } 2325 2326 // queryZonePath queries a path of sql object IDs, as generated by 2327 // queryDescriptorIDPath(), for a ZoneConfig. It returns the most specific 2328 // ZoneConfig specified for the object IDs in the path. 2329 func (s *adminServer) queryZonePath( 2330 ctx context.Context, userName string, path []sqlbase.ID, 2331 ) (sqlbase.ID, zonepb.ZoneConfig, bool, error) { 2332 for i := len(path) - 1; i >= 0; i-- { 2333 zone, zoneExists, err := s.queryZone(ctx, userName, path[i]) 2334 if err != nil || zoneExists { 2335 return path[i], zone, true, err 2336 } 2337 } 2338 return 0, *zonepb.NewZoneConfig(), false, nil 2339 } 2340 2341 // queryNamespaceID queries for the ID of the namespace with the given name and 2342 // parent ID. 2343 func (s *adminServer) queryNamespaceID( 2344 ctx context.Context, userName string, parentID sqlbase.ID, name string, 2345 ) (sqlbase.ID, error) { 2346 const query = `SELECT crdb_internal.get_namespace_id($1, $2)` 2347 rows, cols, err := s.server.sqlServer.internalExecutor.QueryWithCols( 2348 ctx, "admin-query-namespace-ID", nil, /* txn */ 2349 sqlbase.InternalExecutorSessionDataOverride{User: userName}, 2350 query, parentID, name, 2351 ) 2352 if err != nil { 2353 return 0, err 2354 } 2355 2356 if len(rows) != 1 { 2357 return 0, errors.Errorf("invalid number of rows returned: %s (%d, %s)", query, parentID, name) 2358 } 2359 2360 var id int64 2361 scanner := makeResultScanner(cols) 2362 if isNull, err := scanner.IsNull(rows[0], cols[0].Name); err != nil { 2363 return 0, err 2364 } else if isNull { 2365 return 0, errors.Errorf("namespace %s with ParentID %d not found", name, parentID) 2366 } 2367 2368 err = scanner.ScanIndex(rows[0], 0, &id) 2369 if err != nil { 2370 return 0, err 2371 } 2372 2373 return sqlbase.ID(id), nil 2374 } 2375 2376 // queryDescriptorIDPath converts a path of namespaces into a path of namespace 2377 // IDs. For example, if this function is called with a database/table name pair, 2378 // it will return a list of IDs consisting of the root namespace ID, the 2379 // databases ID, and the table ID (in that order). 2380 func (s *adminServer) queryDescriptorIDPath( 2381 ctx context.Context, userName string, names []string, 2382 ) ([]sqlbase.ID, error) { 2383 path := []sqlbase.ID{keys.RootNamespaceID} 2384 for _, name := range names { 2385 id, err := s.queryNamespaceID(ctx, userName, path[len(path)-1], name) 2386 if err != nil { 2387 return nil, err 2388 } 2389 path = append(path, id) 2390 } 2391 return path, nil 2392 } 2393 2394 func (s *adminServer) dialNode( 2395 ctx context.Context, nodeID roachpb.NodeID, 2396 ) (serverpb.AdminClient, error) { 2397 addr, err := s.server.gossip.GetNodeIDAddress(nodeID) 2398 if err != nil { 2399 return nil, err 2400 } 2401 conn, err := s.server.rpcContext.GRPCDialNode( 2402 addr.String(), nodeID, rpc.DefaultClass).Connect(ctx) 2403 if err != nil { 2404 return nil, err 2405 } 2406 return serverpb.NewAdminClient(conn), nil 2407 } 2408 2409 func (s *adminServer) requireAdminUser(ctx context.Context) (userName string, err error) { 2410 userName, isAdmin, err := s.getUserAndRole(ctx) 2411 if err != nil { 2412 return "", err 2413 } 2414 if !isAdmin { 2415 return "", errInsufficientPrivilege 2416 } 2417 return userName, nil 2418 } 2419 2420 func (s *adminServer) getUserAndRole( 2421 ctx context.Context, 2422 ) (userName string, isAdmin bool, err error) { 2423 userName, err = userFromContext(ctx) 2424 if err != nil { 2425 return "", false, err 2426 } 2427 isAdmin, err = s.hasAdminRole(ctx, userName) 2428 return userName, isAdmin, err 2429 } 2430 2431 func (s *adminServer) hasAdminRole(ctx context.Context, sessionUser string) (bool, error) { 2432 if sessionUser == security.RootUser { 2433 // Shortcut. 2434 return true, nil 2435 } 2436 rows, cols, err := s.server.sqlServer.internalExecutor.QueryWithCols( 2437 ctx, "check-is-admin", nil, /* txn */ 2438 sqlbase.InternalExecutorSessionDataOverride{User: sessionUser}, 2439 "SELECT crdb_internal.is_admin()") 2440 if err != nil { 2441 return false, err 2442 } 2443 if len(rows) != 1 || len(cols) != 1 { 2444 return false, errors.AssertionFailedf("hasAdminRole: expected 1 row, got %d", len(rows)) 2445 } 2446 dbDatum, ok := tree.AsDBool(rows[0][0]) 2447 if !ok { 2448 return false, errors.AssertionFailedf("hasAdminRole: expected bool, got %T", rows[0][0]) 2449 } 2450 return bool(dbDatum), nil 2451 } 2452 2453 var errInsufficientPrivilege = status.Error(codes.PermissionDenied, "this operation requires admin privilege")