github.com/goplus/llgo@v0.8.3/py/pandas/gen.go (about) 1 package pandas 2 3 import ( 4 _ "unsafe" 5 6 "github.com/goplus/llgo/py" 7 ) 8 9 const LLGoPackage = "py.pandas" 10 11 // Detect missing values for an array-like object. 12 // 13 // This function takes a scalar or array-like object and indicates 14 // whether values are missing (“NaN“ in numeric arrays, “None“ or “NaN“ 15 // in object arrays, “NaT“ in datetimelike). 16 // 17 // Parameters 18 // ---------- 19 // obj : scalar or array-like 20 // 21 // Object to check for null or missing values. 22 // 23 // Returns 24 // ------- 25 // bool or array-like of bool 26 // 27 // For scalar input, returns a scalar boolean. 28 // For array input, returns an array of boolean indicating whether each 29 // corresponding element is missing. 30 // 31 // See Also 32 // -------- 33 // notna : Boolean inverse of pandas.isna. 34 // Series.isna : Detect missing values in a Series. 35 // DataFrame.isna : Detect missing values in a DataFrame. 36 // Index.isna : Detect missing values in an Index. 37 // 38 // Examples 39 // -------- 40 // Scalar arguments (including strings) result in a scalar boolean. 41 // 42 // >>> pd.isna('dog') 43 // False 44 // 45 // >>> pd.isna(pd.NA) 46 // True 47 // 48 // >>> pd.isna(np.nan) 49 // True 50 // 51 // ndarrays result in an ndarray of booleans. 52 // 53 // >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) 54 // >>> array 55 // array([[ 1., nan, 3.], 56 // 57 // [ 4., 5., nan]]) 58 // 59 // >>> pd.isna(array) 60 // array([[False, True, False], 61 // 62 // [False, False, True]]) 63 // 64 // For indexes, an ndarray of booleans is returned. 65 // 66 // >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, 67 // ... "2017-07-08"]) 68 // >>> index 69 // DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], 70 // 71 // dtype='datetime64[ns]', freq=None) 72 // 73 // >>> pd.isna(index) 74 // array([False, False, True, False]) 75 // 76 // For Series and DataFrame, the same type is returned, containing booleans. 77 // 78 // >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) 79 // >>> df 80 // 81 // 0 1 2 82 // 83 // 0 ant bee cat 84 // 1 dog None fly 85 // >>> pd.isna(df) 86 // 87 // 0 1 2 88 // 89 // 0 False False False 90 // 1 False True False 91 // 92 // >>> pd.isna(df[1]) 93 // 0 False 94 // 1 True 95 // Name: 1, dtype: bool 96 // 97 //go:linkname Isna py.isna 98 func Isna(obj *py.Object) *py.Object 99 100 // Detect missing values for an array-like object. 101 // 102 // This function takes a scalar or array-like object and indicates 103 // whether values are missing (“NaN“ in numeric arrays, “None“ or “NaN“ 104 // in object arrays, “NaT“ in datetimelike). 105 // 106 // Parameters 107 // ---------- 108 // obj : scalar or array-like 109 // 110 // Object to check for null or missing values. 111 // 112 // Returns 113 // ------- 114 // bool or array-like of bool 115 // 116 // For scalar input, returns a scalar boolean. 117 // For array input, returns an array of boolean indicating whether each 118 // corresponding element is missing. 119 // 120 // See Also 121 // -------- 122 // notna : Boolean inverse of pandas.isna. 123 // Series.isna : Detect missing values in a Series. 124 // DataFrame.isna : Detect missing values in a DataFrame. 125 // Index.isna : Detect missing values in an Index. 126 // 127 // Examples 128 // -------- 129 // Scalar arguments (including strings) result in a scalar boolean. 130 // 131 // >>> pd.isna('dog') 132 // False 133 // 134 // >>> pd.isna(pd.NA) 135 // True 136 // 137 // >>> pd.isna(np.nan) 138 // True 139 // 140 // ndarrays result in an ndarray of booleans. 141 // 142 // >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) 143 // >>> array 144 // array([[ 1., nan, 3.], 145 // 146 // [ 4., 5., nan]]) 147 // 148 // >>> pd.isna(array) 149 // array([[False, True, False], 150 // 151 // [False, False, True]]) 152 // 153 // For indexes, an ndarray of booleans is returned. 154 // 155 // >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, 156 // ... "2017-07-08"]) 157 // >>> index 158 // DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], 159 // 160 // dtype='datetime64[ns]', freq=None) 161 // 162 // >>> pd.isna(index) 163 // array([False, False, True, False]) 164 // 165 // For Series and DataFrame, the same type is returned, containing booleans. 166 // 167 // >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) 168 // >>> df 169 // 170 // 0 1 2 171 // 172 // 0 ant bee cat 173 // 1 dog None fly 174 // >>> pd.isna(df) 175 // 176 // 0 1 2 177 // 178 // 0 False False False 179 // 1 False True False 180 // 181 // >>> pd.isna(df[1]) 182 // 0 False 183 // 1 True 184 // Name: 1, dtype: bool 185 // 186 //go:linkname Isnull py.isnull 187 func Isnull(obj *py.Object) *py.Object 188 189 // Detect non-missing values for an array-like object. 190 // 191 // This function takes a scalar or array-like object and indicates 192 // whether values are valid (not missing, which is “NaN“ in numeric 193 // arrays, “None“ or “NaN“ in object arrays, “NaT“ in datetimelike). 194 // 195 // Parameters 196 // ---------- 197 // obj : array-like or object value 198 // 199 // Object to check for *not* null or *non*-missing values. 200 // 201 // Returns 202 // ------- 203 // bool or array-like of bool 204 // 205 // For scalar input, returns a scalar boolean. 206 // For array input, returns an array of boolean indicating whether each 207 // corresponding element is valid. 208 // 209 // See Also 210 // -------- 211 // isna : Boolean inverse of pandas.notna. 212 // Series.notna : Detect valid values in a Series. 213 // DataFrame.notna : Detect valid values in a DataFrame. 214 // Index.notna : Detect valid values in an Index. 215 // 216 // Examples 217 // -------- 218 // Scalar arguments (including strings) result in a scalar boolean. 219 // 220 // >>> pd.notna('dog') 221 // True 222 // 223 // >>> pd.notna(pd.NA) 224 // False 225 // 226 // >>> pd.notna(np.nan) 227 // False 228 // 229 // ndarrays result in an ndarray of booleans. 230 // 231 // >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) 232 // >>> array 233 // array([[ 1., nan, 3.], 234 // 235 // [ 4., 5., nan]]) 236 // 237 // >>> pd.notna(array) 238 // array([[ True, False, True], 239 // 240 // [ True, True, False]]) 241 // 242 // For indexes, an ndarray of booleans is returned. 243 // 244 // >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, 245 // ... "2017-07-08"]) 246 // >>> index 247 // DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], 248 // 249 // dtype='datetime64[ns]', freq=None) 250 // 251 // >>> pd.notna(index) 252 // array([ True, True, False, True]) 253 // 254 // For Series and DataFrame, the same type is returned, containing booleans. 255 // 256 // >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) 257 // >>> df 258 // 259 // 0 1 2 260 // 261 // 0 ant bee cat 262 // 1 dog None fly 263 // >>> pd.notna(df) 264 // 265 // 0 1 2 266 // 267 // 0 True True True 268 // 1 True False True 269 // 270 // >>> pd.notna(df[1]) 271 // 0 True 272 // 1 False 273 // Name: 1, dtype: bool 274 // 275 //go:linkname Notna py.notna 276 func Notna(obj *py.Object) *py.Object 277 278 // Detect non-missing values for an array-like object. 279 // 280 // This function takes a scalar or array-like object and indicates 281 // whether values are valid (not missing, which is “NaN“ in numeric 282 // arrays, “None“ or “NaN“ in object arrays, “NaT“ in datetimelike). 283 // 284 // Parameters 285 // ---------- 286 // obj : array-like or object value 287 // 288 // Object to check for *not* null or *non*-missing values. 289 // 290 // Returns 291 // ------- 292 // bool or array-like of bool 293 // 294 // For scalar input, returns a scalar boolean. 295 // For array input, returns an array of boolean indicating whether each 296 // corresponding element is valid. 297 // 298 // See Also 299 // -------- 300 // isna : Boolean inverse of pandas.notna. 301 // Series.notna : Detect valid values in a Series. 302 // DataFrame.notna : Detect valid values in a DataFrame. 303 // Index.notna : Detect valid values in an Index. 304 // 305 // Examples 306 // -------- 307 // Scalar arguments (including strings) result in a scalar boolean. 308 // 309 // >>> pd.notna('dog') 310 // True 311 // 312 // >>> pd.notna(pd.NA) 313 // False 314 // 315 // >>> pd.notna(np.nan) 316 // False 317 // 318 // ndarrays result in an ndarray of booleans. 319 // 320 // >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) 321 // >>> array 322 // array([[ 1., nan, 3.], 323 // 324 // [ 4., 5., nan]]) 325 // 326 // >>> pd.notna(array) 327 // array([[ True, False, True], 328 // 329 // [ True, True, False]]) 330 // 331 // For indexes, an ndarray of booleans is returned. 332 // 333 // >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, 334 // ... "2017-07-08"]) 335 // >>> index 336 // DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], 337 // 338 // dtype='datetime64[ns]', freq=None) 339 // 340 // >>> pd.notna(index) 341 // array([ True, True, False, True]) 342 // 343 // For Series and DataFrame, the same type is returned, containing booleans. 344 // 345 // >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) 346 // >>> df 347 // 348 // 0 1 2 349 // 350 // 0 ant bee cat 351 // 1 dog None fly 352 // >>> pd.notna(df) 353 // 354 // 0 1 2 355 // 356 // 0 True True True 357 // 1 True False True 358 // 359 // >>> pd.notna(df[1]) 360 // 0 True 361 // 1 False 362 // Name: 1, dtype: bool 363 // 364 //go:linkname Notnull py.notnull 365 func Notnull(obj *py.Object) *py.Object 366 367 // Return a fixed frequency PeriodIndex. 368 // 369 // The day (calendar) is the default frequency. 370 // 371 // Parameters 372 // ---------- 373 // start : str, datetime, date, pandas.Timestamp, or period-like, default None 374 // 375 // Left bound for generating periods. 376 // 377 // end : str, datetime, date, pandas.Timestamp, or period-like, default None 378 // 379 // Right bound for generating periods. 380 // 381 // periods : int, default None 382 // 383 // Number of periods to generate. 384 // 385 // freq : str or DateOffset, optional 386 // 387 // Frequency alias. By default the freq is taken from `start` or `end` 388 // if those are Period objects. Otherwise, the default is ``"D"`` for 389 // daily frequency. 390 // 391 // name : str, default None 392 // 393 // Name of the resulting PeriodIndex. 394 // 395 // Returns 396 // ------- 397 // PeriodIndex 398 // 399 // Notes 400 // ----- 401 // Of the three parameters: “start“, “end“, and “periods“, exactly two 402 // must be specified. 403 // 404 // To learn more about the frequency strings, please see `this link 405 // <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 406 // 407 // Examples 408 // -------- 409 // >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') 410 // PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', 411 // 412 // '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', 413 // '2018-01'], 414 // dtype='period[M]') 415 // 416 // If “start“ or “end“ are “Period“ objects, they will be used as anchor 417 // endpoints for a “PeriodIndex“ with frequency matching that of the 418 // “period_range“ constructor. 419 // 420 // >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), 421 // ... end=pd.Period('2017Q2', freq='Q'), freq='M') 422 // PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], 423 // 424 // dtype='period[M]') 425 // 426 //go:linkname PeriodRange py.period_range 427 func PeriodRange(start *py.Object, end *py.Object, periods *py.Object, freq *py.Object, name *py.Object) *py.Object 428 429 // Return a fixed frequency TimedeltaIndex with day as the default. 430 // 431 // Parameters 432 // ---------- 433 // start : str or timedelta-like, default None 434 // 435 // Left bound for generating timedeltas. 436 // 437 // end : str or timedelta-like, default None 438 // 439 // Right bound for generating timedeltas. 440 // 441 // periods : int, default None 442 // 443 // Number of periods to generate. 444 // 445 // freq : str, Timedelta, datetime.timedelta, or DateOffset, default 'D' 446 // 447 // Frequency strings can have multiples, e.g. '5h'. 448 // 449 // name : str, default None 450 // 451 // Name of the resulting TimedeltaIndex. 452 // 453 // closed : str, default None 454 // 455 // Make the interval closed with respect to the given frequency to 456 // the 'left', 'right', or both sides (None). 457 // 458 // unit : str, default None 459 // 460 // Specify the desired resolution of the result. 461 // 462 // .. versionadded:: 2.0.0 463 // 464 // Returns 465 // ------- 466 // TimedeltaIndex 467 // 468 // Notes 469 // ----- 470 // Of the four parameters “start“, “end“, “periods“, and “freq“, 471 // exactly three must be specified. If “freq“ is omitted, the resulting 472 // “TimedeltaIndex“ will have “periods“ linearly spaced elements between 473 // “start“ and “end“ (closed on both sides). 474 // 475 // To learn more about the frequency strings, please see `this link 476 // <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 477 // 478 // Examples 479 // -------- 480 // >>> pd.timedelta_range(start='1 day', periods=4) 481 // TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], 482 // 483 // dtype='timedelta64[ns]', freq='D') 484 // 485 // The “closed“ parameter specifies which endpoint is included. The default 486 // behavior is to include both endpoints. 487 // 488 // >>> pd.timedelta_range(start='1 day', periods=4, closed='right') 489 // TimedeltaIndex(['2 days', '3 days', '4 days'], 490 // 491 // dtype='timedelta64[ns]', freq='D') 492 // 493 // The “freq“ parameter specifies the frequency of the TimedeltaIndex. 494 // Only fixed frequencies can be passed, non-fixed frequencies such as 495 // 'M' (month end) will raise. 496 // 497 // >>> pd.timedelta_range(start='1 day', end='2 days', freq='6h') 498 // TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00', 499 // 500 // '1 days 18:00:00', '2 days 00:00:00'], 501 // dtype='timedelta64[ns]', freq='6h') 502 // 503 // Specify “start“, “end“, and “periods“; the frequency is generated 504 // automatically (linearly spaced). 505 // 506 // >>> pd.timedelta_range(start='1 day', end='5 days', periods=4) 507 // TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00', 508 // 509 // '5 days 00:00:00'], 510 // dtype='timedelta64[ns]', freq=None) 511 // 512 // **Specify a unit** 513 // 514 // >>> pd.timedelta_range("1 Day", periods=3, freq="100000D", unit="s") 515 // TimedeltaIndex(['1 days', '100001 days', '200001 days'], 516 // 517 // dtype='timedelta64[s]', freq='100000D') 518 // 519 //go:linkname TimedeltaRange py.timedelta_range 520 func TimedeltaRange(start *py.Object, end *py.Object, periods *py.Object, freq *py.Object, name *py.Object, closed *py.Object) *py.Object 521 522 // Return a fixed frequency DatetimeIndex. 523 // 524 // Returns the range of equally spaced time points (where the difference between any 525 // two adjacent points is specified by the given frequency) such that they all 526 // satisfy `start <[=] x <[=] end`, where the first one and the last one are, resp., 527 // the first and last time points in that range that fall on the boundary of “freq“ 528 // (if given as a frequency string) or that are valid for “freq“ (if given as a 529 // :class:`pandas.tseries.offsets.DateOffset`). (If exactly one of “start“, 530 // “end“, or “freq“ is *not* specified, this missing parameter can be computed 531 // given “periods“, the number of timesteps in the range. See the note below.) 532 // 533 // Parameters 534 // ---------- 535 // start : str or datetime-like, optional 536 // 537 // Left bound for generating dates. 538 // 539 // end : str or datetime-like, optional 540 // 541 // Right bound for generating dates. 542 // 543 // periods : int, optional 544 // 545 // Number of periods to generate. 546 // 547 // freq : str, Timedelta, datetime.timedelta, or DateOffset, default 'D' 548 // 549 // Frequency strings can have multiples, e.g. '5h'. See 550 // :ref:`here <timeseries.offset_aliases>` for a list of 551 // frequency aliases. 552 // 553 // tz : str or tzinfo, optional 554 // 555 // Time zone name for returning localized DatetimeIndex, for example 556 // 'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is 557 // timezone-naive unless timezone-aware datetime-likes are passed. 558 // 559 // normalize : bool, default False 560 // 561 // Normalize start/end dates to midnight before generating date range. 562 // 563 // name : str, default None 564 // 565 // Name of the resulting DatetimeIndex. 566 // 567 // inclusive : {"both", "neither", "left", "right"}, default "both" 568 // 569 // Include boundaries; Whether to set each bound as closed or open. 570 // 571 // .. versionadded:: 1.4.0 572 // 573 // unit : str, default None 574 // 575 // Specify the desired resolution of the result. 576 // 577 // .. versionadded:: 2.0.0 578 // 579 // **kwargs 580 // 581 // For compatibility. Has no effect on the result. 582 // 583 // Returns 584 // ------- 585 // DatetimeIndex 586 // 587 // See Also 588 // -------- 589 // DatetimeIndex : An immutable container for datetimes. 590 // timedelta_range : Return a fixed frequency TimedeltaIndex. 591 // period_range : Return a fixed frequency PeriodIndex. 592 // interval_range : Return a fixed frequency IntervalIndex. 593 // 594 // Notes 595 // ----- 596 // Of the four parameters “start“, “end“, “periods“, and “freq“, 597 // exactly three must be specified. If “freq“ is omitted, the resulting 598 // “DatetimeIndex“ will have “periods“ linearly spaced elements between 599 // “start“ and “end“ (closed on both sides). 600 // 601 // To learn more about the frequency strings, please see `this link 602 // <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 603 // 604 // Examples 605 // -------- 606 // **Specifying the values** 607 // 608 // The next four examples generate the same `DatetimeIndex`, but vary 609 // the combination of `start`, `end` and `periods`. 610 // 611 // Specify `start` and `end`, with the default daily frequency. 612 // 613 // >>> pd.date_range(start='1/1/2018', end='1/08/2018') 614 // DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', 615 // 616 // '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], 617 // dtype='datetime64[ns]', freq='D') 618 // 619 // Specify timezone-aware `start` and `end`, with the default daily frequency. 620 // 621 // >>> pd.date_range( 622 // ... start=pd.to_datetime("1/1/2018").tz_localize("Europe/Berlin"), 623 // ... end=pd.to_datetime("1/08/2018").tz_localize("Europe/Berlin"), 624 // ... ) 625 // DatetimeIndex(['2018-01-01 00:00:00+01:00', '2018-01-02 00:00:00+01:00', 626 // 627 // '2018-01-03 00:00:00+01:00', '2018-01-04 00:00:00+01:00', 628 // '2018-01-05 00:00:00+01:00', '2018-01-06 00:00:00+01:00', 629 // '2018-01-07 00:00:00+01:00', '2018-01-08 00:00:00+01:00'], 630 // dtype='datetime64[ns, Europe/Berlin]', freq='D') 631 // 632 // Specify `start` and `periods`, the number of periods (days). 633 // 634 // >>> pd.date_range(start='1/1/2018', periods=8) 635 // DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', 636 // 637 // '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], 638 // dtype='datetime64[ns]', freq='D') 639 // 640 // Specify `end` and `periods`, the number of periods (days). 641 // 642 // >>> pd.date_range(end='1/1/2018', periods=8) 643 // DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28', 644 // 645 // '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'], 646 // dtype='datetime64[ns]', freq='D') 647 // 648 // Specify `start`, `end`, and `periods`; the frequency is generated 649 // automatically (linearly spaced). 650 // 651 // >>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3) 652 // DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00', 653 // 654 // '2018-04-27 00:00:00'], 655 // dtype='datetime64[ns]', freq=None) 656 // 657 // **Other Parameters** 658 // 659 // Changed the `freq` (frequency) to “'ME'“ (month end frequency). 660 // 661 // >>> pd.date_range(start='1/1/2018', periods=5, freq='ME') 662 // DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30', 663 // 664 // '2018-05-31'], 665 // dtype='datetime64[ns]', freq='ME') 666 // 667 // # Multiples are allowed 668 // 669 // >>> pd.date_range(start='1/1/2018', periods=5, freq='3ME') 670 // DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', 671 // 672 // '2019-01-31'], 673 // dtype='datetime64[ns]', freq='3ME') 674 // 675 // `freq` can also be specified as an Offset object. 676 // 677 // >>> pd.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3)) 678 // DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', 679 // 680 // '2019-01-31'], 681 // dtype='datetime64[ns]', freq='3ME') 682 // 683 // Specify `tz` to set the timezone. 684 // 685 // >>> pd.date_range(start='1/1/2018', periods=5, tz='Asia/Tokyo') 686 // DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00', 687 // 688 // '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00', 689 // '2018-01-05 00:00:00+09:00'], 690 // dtype='datetime64[ns, Asia/Tokyo]', freq='D') 691 // 692 // `inclusive` controls whether to include `start` and `end` that are on the 693 // boundary. The default, "both", includes boundary points on either end. 694 // 695 // >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive="both") 696 // DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], 697 // 698 // dtype='datetime64[ns]', freq='D') 699 // 700 // Use “inclusive='left'“ to exclude `end` if it falls on the boundary. 701 // 702 // >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='left') 703 // DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], 704 // 705 // dtype='datetime64[ns]', freq='D') 706 // 707 // Use “inclusive='right'“ to exclude `start` if it falls on the boundary, and 708 // similarly “inclusive='neither'“ will exclude both `start` and `end`. 709 // 710 // >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='right') 711 // DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], 712 // 713 // dtype='datetime64[ns]', freq='D') 714 // 715 // **Specify a unit** 716 // 717 // >>> pd.date_range(start="2017-01-01", periods=10, freq="100YS", unit="s") 718 // DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01', 719 // 720 // '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01', 721 // '2817-01-01', '2917-01-01'], 722 // dtype='datetime64[s]', freq='100YS-JAN') 723 // 724 //go:linkname DateRange py.date_range 725 func DateRange(start *py.Object, end *py.Object, periods *py.Object, freq *py.Object, tz *py.Object, normalize *py.Object, name *py.Object, inclusive *py.Object) *py.Object 726 727 // Return a fixed frequency DatetimeIndex with business day as the default. 728 // 729 // Parameters 730 // ---------- 731 // start : str or datetime-like, default None 732 // 733 // Left bound for generating dates. 734 // 735 // end : str or datetime-like, default None 736 // 737 // Right bound for generating dates. 738 // 739 // periods : int, default None 740 // 741 // Number of periods to generate. 742 // 743 // freq : str, Timedelta, datetime.timedelta, or DateOffset, default 'B' 744 // 745 // Frequency strings can have multiples, e.g. '5h'. The default is 746 // business daily ('B'). 747 // 748 // tz : str or None 749 // 750 // Time zone name for returning localized DatetimeIndex, for example 751 // Asia/Beijing. 752 // 753 // normalize : bool, default False 754 // 755 // Normalize start/end dates to midnight before generating date range. 756 // 757 // name : str, default None 758 // 759 // Name of the resulting DatetimeIndex. 760 // 761 // weekmask : str or None, default None 762 // 763 // Weekmask of valid business days, passed to ``numpy.busdaycalendar``, 764 // only used when custom frequency strings are passed. The default 765 // value None is equivalent to 'Mon Tue Wed Thu Fri'. 766 // 767 // holidays : list-like or None, default None 768 // 769 // Dates to exclude from the set of valid business days, passed to 770 // ``numpy.busdaycalendar``, only used when custom frequency strings 771 // are passed. 772 // 773 // inclusive : {"both", "neither", "left", "right"}, default "both" 774 // 775 // Include boundaries; Whether to set each bound as closed or open. 776 // 777 // .. versionadded:: 1.4.0 778 // 779 // **kwargs 780 // 781 // For compatibility. Has no effect on the result. 782 // 783 // Returns 784 // ------- 785 // DatetimeIndex 786 // 787 // Notes 788 // ----- 789 // Of the four parameters: “start“, “end“, “periods“, and “freq“, 790 // exactly three must be specified. Specifying “freq“ is a requirement 791 // for “bdate_range“. Use “date_range“ if specifying “freq“ is not 792 // desired. 793 // 794 // To learn more about the frequency strings, please see `this link 795 // <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 796 // 797 // Examples 798 // -------- 799 // Note how the two weekend days are skipped in the result. 800 // 801 // >>> pd.bdate_range(start='1/1/2018', end='1/08/2018') 802 // DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', 803 // 804 // '2018-01-05', '2018-01-08'], 805 // dtype='datetime64[ns]', freq='B') 806 // 807 //go:linkname BdateRange py.bdate_range 808 func BdateRange(start *py.Object, end *py.Object, periods *py.Object, freq *py.Object, tz *py.Object, normalize *py.Object, name *py.Object, weekmask *py.Object, holidays *py.Object, inclusive *py.Object) *py.Object 809 810 // Return a fixed frequency IntervalIndex. 811 // 812 // Parameters 813 // ---------- 814 // start : numeric or datetime-like, default None 815 // 816 // Left bound for generating intervals. 817 // 818 // end : numeric or datetime-like, default None 819 // 820 // Right bound for generating intervals. 821 // 822 // periods : int, default None 823 // 824 // Number of periods to generate. 825 // 826 // freq : numeric, str, Timedelta, datetime.timedelta, or DateOffset, default None 827 // 828 // The length of each interval. Must be consistent with the type of start 829 // and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 830 // for numeric and 'D' for datetime-like. 831 // 832 // name : str, default None 833 // 834 // Name of the resulting IntervalIndex. 835 // 836 // closed : {'left', 'right', 'both', 'neither'}, default 'right' 837 // 838 // Whether the intervals are closed on the left-side, right-side, both 839 // or neither. 840 // 841 // Returns 842 // ------- 843 // IntervalIndex 844 // 845 // See Also 846 // -------- 847 // IntervalIndex : An Index of intervals that are all closed on the same side. 848 // 849 // Notes 850 // ----- 851 // Of the four parameters “start“, “end“, “periods“, and “freq“, 852 // exactly three must be specified. If “freq“ is omitted, the resulting 853 // “IntervalIndex“ will have “periods“ linearly spaced elements between 854 // “start“ and “end“, inclusively. 855 // 856 // To learn more about datetime-like frequency strings, please see `this link 857 // <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 858 // 859 // Examples 860 // -------- 861 // Numeric “start“ and “end“ is supported. 862 // 863 // >>> pd.interval_range(start=0, end=5) 864 // IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], 865 // 866 // dtype='interval[int64, right]') 867 // 868 // Additionally, datetime-like input is also supported. 869 // 870 // >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), 871 // ... end=pd.Timestamp('2017-01-04')) 872 // IntervalIndex([(2017-01-01 00:00:00, 2017-01-02 00:00:00], 873 // 874 // (2017-01-02 00:00:00, 2017-01-03 00:00:00], 875 // (2017-01-03 00:00:00, 2017-01-04 00:00:00]], 876 // dtype='interval[datetime64[ns], right]') 877 // 878 // The “freq“ parameter specifies the frequency between the left and right. 879 // endpoints of the individual intervals within the “IntervalIndex“. For 880 // numeric “start“ and “end“, the frequency must also be numeric. 881 // 882 // >>> pd.interval_range(start=0, periods=4, freq=1.5) 883 // IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], 884 // 885 // dtype='interval[float64, right]') 886 // 887 // Similarly, for datetime-like “start“ and “end“, the frequency must be 888 // convertible to a DateOffset. 889 // 890 // >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), 891 // ... periods=3, freq='MS') 892 // IntervalIndex([(2017-01-01 00:00:00, 2017-02-01 00:00:00], 893 // 894 // (2017-02-01 00:00:00, 2017-03-01 00:00:00], 895 // (2017-03-01 00:00:00, 2017-04-01 00:00:00]], 896 // dtype='interval[datetime64[ns], right]') 897 // 898 // Specify “start“, “end“, and “periods“; the frequency is generated 899 // automatically (linearly spaced). 900 // 901 // >>> pd.interval_range(start=0, end=6, periods=4) 902 // IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], 903 // 904 // dtype='interval[float64, right]') 905 // 906 // The “closed“ parameter specifies which endpoints of the individual 907 // intervals within the “IntervalIndex“ are closed. 908 // 909 // >>> pd.interval_range(end=5, periods=4, closed='both') 910 // IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], 911 // 912 // dtype='interval[int64, both]') 913 // 914 //go:linkname IntervalRange py.interval_range 915 func IntervalRange(start *py.Object, end *py.Object, periods *py.Object, freq *py.Object, name *py.Object, closed *py.Object) *py.Object 916 917 // Convert argument to a numeric type. 918 // 919 // The default return dtype is `float64` or `int64` 920 // depending on the data supplied. Use the `downcast` parameter 921 // to obtain other dtypes. 922 // 923 // Please note that precision loss may occur if really large numbers 924 // are passed in. Due to the internal limitations of `ndarray`, if 925 // numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min) 926 // or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are 927 // passed in, it is very likely they will be converted to float so that 928 // they can be stored in an `ndarray`. These warnings apply similarly to 929 // `Series` since it internally leverages `ndarray`. 930 // 931 // Parameters 932 // ---------- 933 // arg : scalar, list, tuple, 1-d array, or Series 934 // 935 // Argument to be converted. 936 // 937 // errors : {'ignore', 'raise', 'coerce'}, default 'raise' 938 // 939 // - If 'raise', then invalid parsing will raise an exception. 940 // 941 // - If 'coerce', then invalid parsing will be set as NaN. 942 // 943 // - If 'ignore', then invalid parsing will return the input. 944 // 945 // .. versionchanged:: 2.2 946 // 947 // "ignore" is deprecated. Catch exceptions explicitly instead. 948 // 949 // downcast : str, default None 950 // 951 // Can be 'integer', 'signed', 'unsigned', or 'float'. 952 // If not None, and if the data has been successfully cast to a 953 // numerical dtype (or if the data was numeric to begin with), 954 // downcast that resulting data to the smallest numerical dtype 955 // possible according to the following rules: 956 // 957 // - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) 958 // - 'unsigned': smallest unsigned int dtype (min.: np.uint8) 959 // - 'float': smallest float dtype (min.: np.float32) 960 // 961 // As this behaviour is separate from the core conversion to 962 // numeric values, any errors raised during the downcasting 963 // will be surfaced regardless of the value of the 'errors' input. 964 // 965 // In addition, downcasting will only occur if the size 966 // of the resulting data's dtype is strictly larger than 967 // the dtype it is to be cast to, so if none of the dtypes 968 // checked satisfy that specification, no downcasting will be 969 // performed on the data. 970 // 971 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 972 // 973 // Back-end data type applied to the resultant :class:`DataFrame` 974 // (still experimental). Behaviour is as follows: 975 // 976 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 977 // (default). 978 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 979 // DataFrame. 980 // 981 // .. versionadded:: 2.0 982 // 983 // Returns 984 // ------- 985 // ret 986 // 987 // Numeric if parsing succeeded. 988 // Return type depends on input. Series if Series, otherwise ndarray. 989 // 990 // See Also 991 // -------- 992 // DataFrame.astype : Cast argument to a specified dtype. 993 // to_datetime : Convert argument to datetime. 994 // to_timedelta : Convert argument to timedelta. 995 // numpy.ndarray.astype : Cast a numpy array to a specified type. 996 // DataFrame.convert_dtypes : Convert dtypes. 997 // 998 // Examples 999 // -------- 1000 // Take separate series and convert to numeric, coercing when told to 1001 // 1002 // >>> s = pd.Series(['1.0', '2', -3]) 1003 // >>> pd.to_numeric(s) 1004 // 0 1.0 1005 // 1 2.0 1006 // 2 -3.0 1007 // dtype: float64 1008 // >>> pd.to_numeric(s, downcast='float') 1009 // 0 1.0 1010 // 1 2.0 1011 // 2 -3.0 1012 // dtype: float32 1013 // >>> pd.to_numeric(s, downcast='signed') 1014 // 0 1 1015 // 1 2 1016 // 2 -3 1017 // dtype: int8 1018 // >>> s = pd.Series(['apple', '1.0', '2', -3]) 1019 // >>> pd.to_numeric(s, errors='coerce') 1020 // 0 NaN 1021 // 1 1.0 1022 // 2 2.0 1023 // 3 -3.0 1024 // dtype: float64 1025 // 1026 // Downcasting of nullable integer and floating dtypes is supported: 1027 // 1028 // >>> s = pd.Series([1, 2, 3], dtype="Int64") 1029 // >>> pd.to_numeric(s, downcast="integer") 1030 // 0 1 1031 // 1 2 1032 // 2 3 1033 // dtype: Int8 1034 // >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64") 1035 // >>> pd.to_numeric(s, downcast="float") 1036 // 0 1.0 1037 // 1 2.1 1038 // 2 3.0 1039 // dtype: Float32 1040 // 1041 //go:linkname ToNumeric py.to_numeric 1042 func ToNumeric(arg *py.Object, errors *py.Object, downcast *py.Object, dtypeBackend *py.Object) *py.Object 1043 1044 // Convert argument to datetime. 1045 // 1046 // This function converts a scalar, array-like, :class:`Series` or 1047 // :class:`DataFrame`/dict-like to a pandas datetime object. 1048 // 1049 // Parameters 1050 // ---------- 1051 // arg : int, float, str, datetime, list, tuple, 1-d array, Series, DataFrame/dict-like 1052 // 1053 // The object to convert to a datetime. If a :class:`DataFrame` is provided, the 1054 // method expects minimally the following columns: :const:`"year"`, 1055 // :const:`"month"`, :const:`"day"`. The column "year" 1056 // must be specified in 4-digit format. 1057 // 1058 // errors : {'ignore', 'raise', 'coerce'}, default 'raise' 1059 // - If :const:`'raise'`, then invalid parsing will raise an exception. 1060 // - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`. 1061 // - If :const:`'ignore'`, then invalid parsing will return the input. 1062 // 1063 // dayfirst : bool, default False 1064 // 1065 // Specify a date parse order if `arg` is str or is list-like. 1066 // If :const:`True`, parses dates with the day first, e.g. :const:`"10/11/12"` 1067 // is parsed as :const:`2012-11-10`. 1068 // 1069 // .. warning:: 1070 // 1071 // ``dayfirst=True`` is not strict, but will prefer to parse 1072 // with day first. 1073 // 1074 // yearfirst : bool, default False 1075 // 1076 // Specify a date parse order if `arg` is str or is list-like. 1077 // 1078 // - If :const:`True` parses dates with the year first, e.g. 1079 // :const:`"10/11/12"` is parsed as :const:`2010-11-12`. 1080 // - If both `dayfirst` and `yearfirst` are :const:`True`, `yearfirst` is 1081 // preceded (same as :mod:`dateutil`). 1082 // 1083 // .. warning:: 1084 // 1085 // ``yearfirst=True`` is not strict, but will prefer to parse 1086 // with year first. 1087 // 1088 // utc : bool, default False 1089 // 1090 // Control timezone-related parsing, localization and conversion. 1091 // 1092 // - If :const:`True`, the function *always* returns a timezone-aware 1093 // UTC-localized :class:`Timestamp`, :class:`Series` or 1094 // :class:`DatetimeIndex`. To do this, timezone-naive inputs are 1095 // *localized* as UTC, while timezone-aware inputs are *converted* to UTC. 1096 // 1097 // - If :const:`False` (default), inputs will not be coerced to UTC. 1098 // Timezone-naive inputs will remain naive, while timezone-aware ones 1099 // will keep their time offsets. Limitations exist for mixed 1100 // offsets (typically, daylight savings), see :ref:`Examples 1101 // <to_datetime_tz_examples>` section for details. 1102 // 1103 // .. warning:: 1104 // 1105 // In a future version of pandas, parsing datetimes with mixed time 1106 // zones will raise an error unless `utc=True`. 1107 // Please specify `utc=True` to opt in to the new behaviour 1108 // and silence this warning. To create a `Series` with mixed offsets and 1109 // `object` dtype, please use `apply` and `datetime.datetime.strptime`. 1110 // 1111 // See also: pandas general documentation about `timezone conversion and 1112 // localization 1113 // <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html 1114 // #time-zone-handling>`_. 1115 // 1116 // format : str, default None 1117 // 1118 // The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. See 1119 // `strftime documentation 1120 // <https://docs.python.org/3/library/datetime.html 1121 // #strftime-and-strptime-behavior>`_ for more information on choices, though 1122 // note that :const:`"%f"` will parse all the way up to nanoseconds. 1123 // You can also pass: 1124 // 1125 // - "ISO8601", to parse any `ISO8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ 1126 // time string (not necessarily in exactly the same format); 1127 // - "mixed", to infer the format for each element individually. This is risky, 1128 // and you should probably use it along with `dayfirst`. 1129 // 1130 // .. note:: 1131 // 1132 // If a :class:`DataFrame` is passed, then `format` has no effect. 1133 // 1134 // exact : bool, default True 1135 // 1136 // Control how `format` is used: 1137 // 1138 // - If :const:`True`, require an exact `format` match. 1139 // - If :const:`False`, allow the `format` to match anywhere in the target 1140 // string. 1141 // 1142 // Cannot be used alongside ``format='ISO8601'`` or ``format='mixed'``. 1143 // 1144 // unit : str, default 'ns' 1145 // 1146 // The unit of the arg (D,s,ms,us,ns) denote the unit, which is an 1147 // integer or float number. This will be based off the origin. 1148 // Example, with ``unit='ms'`` and ``origin='unix'``, this would calculate 1149 // the number of milliseconds to the unix epoch start. 1150 // 1151 // infer_datetime_format : bool, default False 1152 // 1153 // If :const:`True` and no `format` is given, attempt to infer the format 1154 // of the datetime strings based on the first non-NaN element, 1155 // and if it can be inferred, switch to a faster method of parsing them. 1156 // In some cases this can increase the parsing speed by ~5-10x. 1157 // 1158 // .. deprecated:: 2.0.0 1159 // A strict version of this argument is now the default, passing it has 1160 // no effect. 1161 // 1162 // origin : scalar, default 'unix' 1163 // 1164 // Define the reference date. The numeric values would be parsed as number 1165 // of units (defined by `unit`) since this reference date. 1166 // 1167 // - If :const:`'unix'` (or POSIX) time; origin is set to 1970-01-01. 1168 // - If :const:`'julian'`, unit must be :const:`'D'`, and origin is set to 1169 // beginning of Julian Calendar. Julian day number :const:`0` is assigned 1170 // to the day starting at noon on January 1, 4713 BC. 1171 // - If Timestamp convertible (Timestamp, dt.datetime, np.datetimt64 or date 1172 // string), origin is set to Timestamp identified by origin. 1173 // - If a float or integer, origin is the difference 1174 // (in units determined by the ``unit`` argument) relative to 1970-01-01. 1175 // 1176 // cache : bool, default True 1177 // 1178 // If :const:`True`, use a cache of unique, converted dates to apply the 1179 // datetime conversion. May produce significant speed-up when parsing 1180 // duplicate date strings, especially ones with timezone offsets. The cache 1181 // is only used when there are at least 50 values. The presence of 1182 // out-of-bounds values will render the cache unusable and may slow down 1183 // parsing. 1184 // 1185 // Returns 1186 // ------- 1187 // datetime 1188 // 1189 // If parsing succeeded. 1190 // Return type depends on input (types in parenthesis correspond to 1191 // fallback in case of unsuccessful timezone or out-of-range timestamp 1192 // parsing): 1193 // 1194 // - scalar: :class:`Timestamp` (or :class:`datetime.datetime`) 1195 // - array-like: :class:`DatetimeIndex` (or :class:`Series` with 1196 // :class:`object` dtype containing :class:`datetime.datetime`) 1197 // - Series: :class:`Series` of :class:`datetime64` dtype (or 1198 // :class:`Series` of :class:`object` dtype containing 1199 // :class:`datetime.datetime`) 1200 // - DataFrame: :class:`Series` of :class:`datetime64` dtype (or 1201 // :class:`Series` of :class:`object` dtype containing 1202 // :class:`datetime.datetime`) 1203 // 1204 // Raises 1205 // ------ 1206 // ParserError 1207 // 1208 // When parsing a date from string fails. 1209 // 1210 // ValueError 1211 // 1212 // When another datetime conversion error happens. For example when one 1213 // of 'year', 'month', day' columns is missing in a :class:`DataFrame`, or 1214 // when a Timezone-aware :class:`datetime.datetime` is found in an array-like 1215 // of mixed time offsets, and ``utc=False``. 1216 // 1217 // See Also 1218 // -------- 1219 // DataFrame.astype : Cast argument to a specified dtype. 1220 // to_timedelta : Convert argument to timedelta. 1221 // convert_dtypes : Convert dtypes. 1222 // 1223 // Notes 1224 // ----- 1225 // 1226 // Many input types are supported, and lead to different output types: 1227 // 1228 // - **scalars** can be int, float, str, datetime object (from stdlib :mod:`datetime` 1229 // module or :mod:`numpy`). They are converted to :class:`Timestamp` when 1230 // possible, otherwise they are converted to :class:`datetime.datetime`. 1231 // None/NaN/null scalars are converted to :const:`NaT`. 1232 // 1233 // - **array-like** can contain int, float, str, datetime objects. They are 1234 // converted to :class:`DatetimeIndex` when possible, otherwise they are 1235 // converted to :class:`Index` with :class:`object` dtype, containing 1236 // :class:`datetime.datetime`. None/NaN/null entries are converted to 1237 // :const:`NaT` in both cases. 1238 // 1239 // - **Series** are converted to :class:`Series` with :class:`datetime64` 1240 // dtype when possible, otherwise they are converted to :class:`Series` with 1241 // :class:`object` dtype, containing :class:`datetime.datetime`. None/NaN/null 1242 // entries are converted to :const:`NaT` in both cases. 1243 // 1244 // - **DataFrame/dict-like** are converted to :class:`Series` with 1245 // :class:`datetime64` dtype. For each row a datetime is created from assembling 1246 // the various dataframe columns. Column keys can be common abbreviations 1247 // like ['year', 'month', 'day', 'minute', 'second', 'ms', 'us', 'ns']) or 1248 // plurals of the same. 1249 // 1250 // The following causes are responsible for :class:`datetime.datetime` objects 1251 // being returned (possibly inside an :class:`Index` or a :class:`Series` with 1252 // :class:`object` dtype) instead of a proper pandas designated type 1253 // (:class:`Timestamp`, :class:`DatetimeIndex` or :class:`Series` 1254 // with :class:`datetime64` dtype): 1255 // 1256 // - when any input element is before :const:`Timestamp.min` or after 1257 // :const:`Timestamp.max`, see `timestamp limitations 1258 // <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html 1259 // #timeseries-timestamp-limits>`_. 1260 // 1261 // - when “utc=False“ (default) and the input is an array-like or 1262 // :class:`Series` containing mixed naive/aware datetime, or aware with mixed 1263 // time offsets. Note that this happens in the (quite frequent) situation when 1264 // the timezone has a daylight savings policy. In that case you may wish to 1265 // use “utc=True“. 1266 // 1267 // Examples 1268 // -------- 1269 // 1270 // **Handling various input formats** 1271 // 1272 // Assembling a datetime from multiple columns of a :class:`DataFrame`. The keys 1273 // can be common abbreviations like ['year', 'month', 'day', 'minute', 'second', 1274 // 'ms', 'us', 'ns']) or plurals of the same 1275 // 1276 // >>> df = pd.DataFrame({'year': [2015, 2016], 1277 // ... 'month': [2, 3], 1278 // ... 'day': [4, 5]}) 1279 // >>> pd.to_datetime(df) 1280 // 0 2015-02-04 1281 // 1 2016-03-05 1282 // dtype: datetime64[ns] 1283 // 1284 // # Using a unix epoch time 1285 // 1286 // >>> pd.to_datetime(1490195805, unit='s') 1287 // Timestamp('2017-03-22 15:16:45') 1288 // >>> pd.to_datetime(1490195805433502912, unit='ns') 1289 // Timestamp('2017-03-22 15:16:45.433502912') 1290 // 1291 // .. warning:: For float arg, precision rounding might happen. To prevent 1292 // 1293 // unexpected behavior use a fixed-width exact type. 1294 // 1295 // # Using a non-unix epoch origin 1296 // 1297 // >>> pd.to_datetime([1, 2, 3], unit='D', 1298 // ... origin=pd.Timestamp('1960-01-01')) 1299 // DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], 1300 // 1301 // dtype='datetime64[ns]', freq=None) 1302 // 1303 // **Differences with strptime behavior** 1304 // 1305 // :const:`"%f"` will parse all the way up to nanoseconds. 1306 // 1307 // >>> pd.to_datetime('2018-10-26 12:00:00.0000000011', 1308 // ... format='%Y-%m-%d %H:%M:%S.%f') 1309 // Timestamp('2018-10-26 12:00:00.000000001') 1310 // 1311 // **Non-convertible date/times** 1312 // 1313 // Passing “errors='coerce'“ will force an out-of-bounds date to :const:`NaT`, 1314 // in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`. 1315 // 1316 // >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') 1317 // NaT 1318 // 1319 // .. _to_datetime_tz_examples: 1320 // 1321 // **Timezones and time offsets** 1322 // 1323 // The default behaviour (“utc=False“) is as follows: 1324 // 1325 // - Timezone-naive inputs are converted to timezone-naive :class:`DatetimeIndex`: 1326 // 1327 // >>> pd.to_datetime(['2018-10-26 12:00:00', '2018-10-26 13:00:15']) 1328 // DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'], 1329 // 1330 // dtype='datetime64[ns]', freq=None) 1331 // 1332 // - Timezone-aware inputs *with constant time offset* are converted to 1333 // timezone-aware :class:`DatetimeIndex`: 1334 // 1335 // >>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500']) 1336 // DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'], 1337 // 1338 // dtype='datetime64[ns, UTC-05:00]', freq=None) 1339 // 1340 // - However, timezone-aware inputs *with mixed time offsets* (for example 1341 // issued from a timezone with daylight savings, such as Europe/Paris) 1342 // are **not successfully converted** to a :class:`DatetimeIndex`. 1343 // Parsing datetimes with mixed time zones will show a warning unless 1344 // `utc=True`. If you specify `utc=False` the warning below will be shown 1345 // and a simple :class:`Index` containing :class:`datetime.datetime` 1346 // objects will be returned: 1347 // 1348 // >>> pd.to_datetime(['2020-10-25 02:00 +0200', 1349 // ... '2020-10-25 04:00 +0100']) # doctest: +SKIP 1350 // FutureWarning: In a future version of pandas, parsing datetimes with mixed 1351 // time zones will raise an error unless `utc=True`. Please specify `utc=True` 1352 // to opt in to the new behaviour and silence this warning. To create a `Series` 1353 // with mixed offsets and `object` dtype, please use `apply` and 1354 // `datetime.datetime.strptime`. 1355 // Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00], 1356 // 1357 // dtype='object') 1358 // 1359 // - A mix of timezone-aware and timezone-naive inputs is also converted to 1360 // a simple :class:`Index` containing :class:`datetime.datetime` objects: 1361 // 1362 // >>> from datetime import datetime 1363 // >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", 1364 // ... datetime(2020, 1, 1, 3, 0)]) # doctest: +SKIP 1365 // FutureWarning: In a future version of pandas, parsing datetimes with mixed 1366 // time zones will raise an error unless `utc=True`. Please specify `utc=True` 1367 // to opt in to the new behaviour and silence this warning. To create a `Series` 1368 // with mixed offsets and `object` dtype, please use `apply` and 1369 // `datetime.datetime.strptime`. 1370 // Index([2020-01-01 01:00:00-01:00, 2020-01-01 03:00:00], dtype='object') 1371 // 1372 // | 1373 // 1374 // Setting “utc=True“ solves most of the above issues: 1375 // 1376 // - Timezone-naive inputs are *localized* as UTC 1377 // 1378 // >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True) 1379 // DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'], 1380 // 1381 // dtype='datetime64[ns, UTC]', freq=None) 1382 // 1383 // - Timezone-aware inputs are *converted* to UTC (the output represents the 1384 // exact same datetime, but viewed from the UTC time offset `+00:00`). 1385 // 1386 // >>> pd.to_datetime(['2018-10-26 12:00 -0530', '2018-10-26 12:00 -0500'], 1387 // ... utc=True) 1388 // DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'], 1389 // 1390 // dtype='datetime64[ns, UTC]', freq=None) 1391 // 1392 // - Inputs can contain both string or datetime, the above 1393 // rules still apply 1394 // 1395 // >>> pd.to_datetime(['2018-10-26 12:00', datetime(2020, 1, 1, 18)], utc=True) 1396 // DatetimeIndex(['2018-10-26 12:00:00+00:00', '2020-01-01 18:00:00+00:00'], 1397 // 1398 // dtype='datetime64[ns, UTC]', freq=None) 1399 // 1400 //go:linkname ToDatetime py.to_datetime 1401 func ToDatetime(arg *py.Object, errors *py.Object, dayfirst *py.Object, yearfirst *py.Object, utc *py.Object, format *py.Object, exact *py.Object, unit *py.Object, inferDatetimeFormat *py.Object, origin *py.Object, cache *py.Object) *py.Object 1402 1403 // Convert argument to timedelta. 1404 // 1405 // Timedeltas are absolute differences in times, expressed in difference 1406 // units (e.g. days, hours, minutes, seconds). This method converts 1407 // an argument from a recognized timedelta format / value into 1408 // a Timedelta type. 1409 // 1410 // Parameters 1411 // ---------- 1412 // arg : str, timedelta, list-like or Series 1413 // 1414 // The data to be converted to timedelta. 1415 // 1416 // .. versionchanged:: 2.0 1417 // Strings with units 'M', 'Y' and 'y' do not represent 1418 // unambiguous timedelta values and will raise an exception. 1419 // 1420 // unit : str, optional 1421 // 1422 // Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``. 1423 // 1424 // Possible values: 1425 // 1426 // * 'W' 1427 // * 'D' / 'days' / 'day' 1428 // * 'hours' / 'hour' / 'hr' / 'h' / 'H' 1429 // * 'm' / 'minute' / 'min' / 'minutes' / 'T' 1430 // * 's' / 'seconds' / 'sec' / 'second' / 'S' 1431 // * 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' / 'L' 1432 // * 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' / 'U' 1433 // * 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond' / 'N' 1434 // 1435 // Must not be specified when `arg` contains strings and ``errors="raise"``. 1436 // 1437 // .. deprecated:: 2.2.0 1438 // Units 'H', 'T', 'S', 'L', 'U' and 'N' are deprecated and will be removed 1439 // in a future version. Please use 'h', 'min', 's', 'ms', 'us', and 'ns' 1440 // instead of 'H', 'T', 'S', 'L', 'U' and 'N'. 1441 // 1442 // errors : {'ignore', 'raise', 'coerce'}, default 'raise' 1443 // - If 'raise', then invalid parsing will raise an exception. 1444 // - If 'coerce', then invalid parsing will be set as NaT. 1445 // - If 'ignore', then invalid parsing will return the input. 1446 // 1447 // Returns 1448 // ------- 1449 // timedelta 1450 // 1451 // If parsing succeeded. 1452 // Return type depends on input: 1453 // 1454 // - list-like: TimedeltaIndex of timedelta64 dtype 1455 // - Series: Series of timedelta64 dtype 1456 // - scalar: Timedelta 1457 // 1458 // See Also 1459 // -------- 1460 // DataFrame.astype : Cast argument to a specified dtype. 1461 // to_datetime : Convert argument to datetime. 1462 // convert_dtypes : Convert dtypes. 1463 // 1464 // Notes 1465 // ----- 1466 // If the precision is higher than nanoseconds, the precision of the duration is 1467 // truncated to nanoseconds for string inputs. 1468 // 1469 // Examples 1470 // -------- 1471 // Parsing a single string to a Timedelta: 1472 // 1473 // >>> pd.to_timedelta('1 days 06:05:01.00003') 1474 // Timedelta('1 days 06:05:01.000030') 1475 // >>> pd.to_timedelta('15.5us') 1476 // Timedelta('0 days 00:00:00.000015500') 1477 // 1478 // Parsing a list or array of strings: 1479 // 1480 // >>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan']) 1481 // TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015500', NaT], 1482 // 1483 // dtype='timedelta64[ns]', freq=None) 1484 // 1485 // Converting numbers by specifying the `unit` keyword argument: 1486 // 1487 // >>> pd.to_timedelta(np.arange(5), unit='s') 1488 // TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02', 1489 // 1490 // '0 days 00:00:03', '0 days 00:00:04'], 1491 // dtype='timedelta64[ns]', freq=None) 1492 // 1493 // >>> pd.to_timedelta(np.arange(5), unit='d') 1494 // TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], 1495 // 1496 // dtype='timedelta64[ns]', freq=None) 1497 // 1498 //go:linkname ToTimedelta py.to_timedelta 1499 func ToTimedelta(arg *py.Object, unit *py.Object, errors *py.Object) *py.Object 1500 1501 // Encode the object as an enumerated type or categorical variable. 1502 // 1503 // This method is useful for obtaining a numeric representation of an 1504 // array when all that matters is identifying distinct values. `factorize` 1505 // is available as both a top-level function :func:`pandas.factorize`, 1506 // and as a method :meth:`Series.factorize` and :meth:`Index.factorize`. 1507 // 1508 // Parameters 1509 // ---------- 1510 // values : sequence 1511 // 1512 // A 1-D sequence. Sequences that aren't pandas objects are 1513 // coerced to ndarrays before factorization. 1514 // 1515 // sort : bool, default False 1516 // 1517 // Sort `uniques` and shuffle `codes` to maintain the 1518 // relationship. 1519 // 1520 // use_na_sentinel : bool, default True 1521 // 1522 // If True, the sentinel -1 will be used for NaN values. If False, 1523 // NaN values will be encoded as non-negative integers and will not drop the 1524 // NaN from the uniques of the values. 1525 // 1526 // .. versionadded:: 1.5.0 1527 // 1528 // size_hint : int, optional 1529 // 1530 // Hint to the hashtable sizer. 1531 // 1532 // Returns 1533 // ------- 1534 // codes : ndarray 1535 // 1536 // An integer ndarray that's an indexer into `uniques`. 1537 // ``uniques.take(codes)`` will have the same values as `values`. 1538 // 1539 // uniques : ndarray, Index, or Categorical 1540 // 1541 // The unique valid values. When `values` is Categorical, `uniques` 1542 // is a Categorical. When `values` is some other pandas object, an 1543 // `Index` is returned. Otherwise, a 1-D ndarray is returned. 1544 // 1545 // .. note:: 1546 // 1547 // Even if there's a missing value in `values`, `uniques` will 1548 // *not* contain an entry for it. 1549 // 1550 // See Also 1551 // -------- 1552 // cut : Discretize continuous-valued array. 1553 // unique : Find the unique value in an array. 1554 // 1555 // Notes 1556 // ----- 1557 // Reference :ref:`the user guide <reshaping.factorize>` for more examples. 1558 // 1559 // Examples 1560 // -------- 1561 // These examples all show factorize as a top-level method like 1562 // “pd.factorize(values)“. The results are identical for methods like 1563 // :meth:`Series.factorize`. 1564 // 1565 // >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O")) 1566 // >>> codes 1567 // array([0, 0, 1, 2, 0]) 1568 // >>> uniques 1569 // array(['b', 'a', 'c'], dtype=object) 1570 // 1571 // With “sort=True“, the `uniques` will be sorted, and `codes` will be 1572 // shuffled so that the relationship is the maintained. 1573 // 1574 // >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"), 1575 // ... sort=True) 1576 // >>> codes 1577 // array([1, 1, 0, 2, 1]) 1578 // >>> uniques 1579 // array(['a', 'b', 'c'], dtype=object) 1580 // 1581 // When “use_na_sentinel=True“ (the default), missing values are indicated in 1582 // the `codes` with the sentinel value “-1“ and missing values are not 1583 // included in `uniques`. 1584 // 1585 // >>> codes, uniques = pd.factorize(np.array(['b', None, 'a', 'c', 'b'], dtype="O")) 1586 // >>> codes 1587 // array([ 0, -1, 1, 2, 0]) 1588 // >>> uniques 1589 // array(['b', 'a', 'c'], dtype=object) 1590 // 1591 // Thus far, we've only factorized lists (which are internally coerced to 1592 // NumPy arrays). When factorizing pandas objects, the type of `uniques` 1593 // will differ. For Categoricals, a `Categorical` is returned. 1594 // 1595 // >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c']) 1596 // >>> codes, uniques = pd.factorize(cat) 1597 // >>> codes 1598 // array([0, 0, 1]) 1599 // >>> uniques 1600 // ['a', 'c'] 1601 // Categories (3, object): ['a', 'b', 'c'] 1602 // 1603 // Notice that “'b'“ is in “uniques.categories“, despite not being 1604 // present in “cat.values“. 1605 // 1606 // For all other pandas objects, an Index of the appropriate type is 1607 // returned. 1608 // 1609 // >>> cat = pd.Series(['a', 'a', 'c']) 1610 // >>> codes, uniques = pd.factorize(cat) 1611 // >>> codes 1612 // array([0, 0, 1]) 1613 // >>> uniques 1614 // Index(['a', 'c'], dtype='object') 1615 // 1616 // If NaN is in the values, and we want to include NaN in the uniques of the 1617 // values, it can be achieved by setting “use_na_sentinel=False“. 1618 // 1619 // >>> values = np.array([1, 2, 1, np.nan]) 1620 // >>> codes, uniques = pd.factorize(values) # default: use_na_sentinel=True 1621 // >>> codes 1622 // array([ 0, 1, 0, -1]) 1623 // >>> uniques 1624 // array([1., 2.]) 1625 // 1626 // >>> codes, uniques = pd.factorize(values, use_na_sentinel=False) 1627 // >>> codes 1628 // array([0, 1, 0, 2]) 1629 // >>> uniques 1630 // array([ 1., 2., nan]) 1631 // 1632 //go:linkname Factorize py.factorize 1633 func Factorize(values *py.Object, sort *py.Object, useNaSentinel *py.Object, sizeHint *py.Object) *py.Object 1634 1635 // Return unique values based on a hash table. 1636 // 1637 // Uniques are returned in order of appearance. This does NOT sort. 1638 // 1639 // Significantly faster than numpy.unique for long enough sequences. 1640 // Includes NA values. 1641 // 1642 // Parameters 1643 // ---------- 1644 // values : 1d array-like 1645 // 1646 // Returns 1647 // ------- 1648 // numpy.ndarray or ExtensionArray 1649 // 1650 // The return can be: 1651 // 1652 // * Index : when the input is an Index 1653 // * Categorical : when the input is a Categorical dtype 1654 // * ndarray : when the input is a Series/ndarray 1655 // 1656 // Return numpy.ndarray or ExtensionArray. 1657 // 1658 // See Also 1659 // -------- 1660 // Index.unique : Return unique values from an Index. 1661 // Series.unique : Return unique values of Series object. 1662 // 1663 // Examples 1664 // -------- 1665 // >>> pd.unique(pd.Series([2, 1, 3, 3])) 1666 // array([2, 1, 3]) 1667 // 1668 // >>> pd.unique(pd.Series([2] + [1] * 5)) 1669 // array([2, 1]) 1670 // 1671 // >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])) 1672 // array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') 1673 // 1674 // >>> pd.unique( 1675 // ... pd.Series( 1676 // ... [ 1677 // ... pd.Timestamp("20160101", tz="US/Eastern"), 1678 // ... pd.Timestamp("20160101", tz="US/Eastern"), 1679 // ... ] 1680 // ... ) 1681 // ... ) 1682 // <DatetimeArray> 1683 // ['2016-01-01 00:00:00-05:00'] 1684 // Length: 1, dtype: datetime64[ns, US/Eastern] 1685 // 1686 // >>> pd.unique( 1687 // ... pd.Index( 1688 // ... [ 1689 // ... pd.Timestamp("20160101", tz="US/Eastern"), 1690 // ... pd.Timestamp("20160101", tz="US/Eastern"), 1691 // ... ] 1692 // ... ) 1693 // ... ) 1694 // DatetimeIndex(['2016-01-01 00:00:00-05:00'], 1695 // 1696 // dtype='datetime64[ns, US/Eastern]', 1697 // freq=None) 1698 // 1699 // >>> pd.unique(np.array(list("baabc"), dtype="O")) 1700 // array(['b', 'a', 'c'], dtype=object) 1701 // 1702 // An unordered Categorical will return categories in the 1703 // order of appearance. 1704 // 1705 // >>> pd.unique(pd.Series(pd.Categorical(list("baabc")))) 1706 // ['b', 'a', 'c'] 1707 // Categories (3, object): ['a', 'b', 'c'] 1708 // 1709 // >>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc")))) 1710 // ['b', 'a', 'c'] 1711 // Categories (3, object): ['a', 'b', 'c'] 1712 // 1713 // An ordered Categorical preserves the category ordering. 1714 // 1715 // >>> pd.unique( 1716 // ... pd.Series( 1717 // ... pd.Categorical(list("baabc"), categories=list("abc"), ordered=True) 1718 // ... ) 1719 // ... ) 1720 // ['b', 'a', 'c'] 1721 // Categories (3, object): ['a' < 'b' < 'c'] 1722 // 1723 // # An array of tuples 1724 // 1725 // >>> pd.unique(pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values) 1726 // array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object) 1727 // 1728 //go:linkname Unique py.unique 1729 func Unique(values *py.Object) *py.Object 1730 1731 // Compute a histogram of the counts of non-null values. 1732 // 1733 // Parameters 1734 // ---------- 1735 // values : ndarray (1-d) 1736 // sort : bool, default True 1737 // 1738 // Sort by values 1739 // 1740 // ascending : bool, default False 1741 // 1742 // Sort in ascending order 1743 // 1744 // normalize: bool, default False 1745 // 1746 // If True then compute a relative histogram 1747 // 1748 // bins : integer, optional 1749 // 1750 // Rather than count values, group them into half-open bins, 1751 // convenience for pd.cut, only works with numeric data 1752 // 1753 // dropna : bool, default True 1754 // 1755 // Don't include counts of NaN 1756 // 1757 // Returns 1758 // ------- 1759 // Series 1760 // 1761 //go:linkname ValueCounts py.value_counts 1762 func ValueCounts(values *py.Object, sort *py.Object, ascending *py.Object, normalize *py.Object, bins *py.Object, dropna *py.Object) *py.Object 1763 1764 // Create an array. 1765 // 1766 // Parameters 1767 // ---------- 1768 // data : Sequence of objects 1769 // 1770 // The scalars inside `data` should be instances of the 1771 // scalar type for `dtype`. It's expected that `data` 1772 // represents a 1-dimensional array of data. 1773 // 1774 // When `data` is an Index or Series, the underlying array 1775 // will be extracted from `data`. 1776 // 1777 // dtype : str, np.dtype, or ExtensionDtype, optional 1778 // 1779 // The dtype to use for the array. This may be a NumPy 1780 // dtype or an extension type registered with pandas using 1781 // :meth:`pandas.api.extensions.register_extension_dtype`. 1782 // 1783 // If not specified, there are two possibilities: 1784 // 1785 // 1. When `data` is a :class:`Series`, :class:`Index`, or 1786 // :class:`ExtensionArray`, the `dtype` will be taken 1787 // from the data. 1788 // 2. Otherwise, pandas will attempt to infer the `dtype` 1789 // from the data. 1790 // 1791 // Note that when `data` is a NumPy array, ``data.dtype`` is 1792 // *not* used for inferring the array type. This is because 1793 // NumPy cannot represent all the types of data that can be 1794 // held in extension arrays. 1795 // 1796 // Currently, pandas will infer an extension dtype for sequences of 1797 // 1798 // ============================== ======================================= 1799 // Scalar Type Array Type 1800 // ============================== ======================================= 1801 // :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray` 1802 // :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` 1803 // :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` 1804 // :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` 1805 // :class:`int` :class:`pandas.arrays.IntegerArray` 1806 // :class:`float` :class:`pandas.arrays.FloatingArray` 1807 // :class:`str` :class:`pandas.arrays.StringArray` or 1808 // :class:`pandas.arrays.ArrowStringArray` 1809 // :class:`bool` :class:`pandas.arrays.BooleanArray` 1810 // ============================== ======================================= 1811 // 1812 // The ExtensionArray created when the scalar type is :class:`str` is determined by 1813 // ``pd.options.mode.string_storage`` if the dtype is not explicitly given. 1814 // 1815 // For all other cases, NumPy's usual inference rules will be used. 1816 // 1817 // copy : bool, default True 1818 // 1819 // Whether to copy the data, even if not necessary. Depending 1820 // on the type of `data`, creating the new array may require 1821 // copying data, even if ``copy=False``. 1822 // 1823 // Returns 1824 // ------- 1825 // ExtensionArray 1826 // 1827 // The newly created array. 1828 // 1829 // Raises 1830 // ------ 1831 // ValueError 1832 // 1833 // When `data` is not 1-dimensional. 1834 // 1835 // See Also 1836 // -------- 1837 // numpy.array : Construct a NumPy array. 1838 // Series : Construct a pandas Series. 1839 // Index : Construct a pandas Index. 1840 // arrays.NumpyExtensionArray : ExtensionArray wrapping a NumPy array. 1841 // Series.array : Extract the array stored within a Series. 1842 // 1843 // Notes 1844 // ----- 1845 // Omitting the `dtype` argument means pandas will attempt to infer the 1846 // best array type from the values in the data. As new array types are 1847 // added by pandas and 3rd party libraries, the "best" array type may 1848 // change. We recommend specifying `dtype` to ensure that 1849 // 1850 // 1. the correct array type for the data is returned 1851 // 2. the returned array type doesn't change as new extension types 1852 // are added by pandas and third-party libraries 1853 // 1854 // Additionally, if the underlying memory representation of the returned 1855 // array matters, we recommend specifying the `dtype` as a concrete object 1856 // rather than a string alias or allowing it to be inferred. For example, 1857 // a future version of pandas or a 3rd-party library may include a 1858 // dedicated ExtensionArray for string data. In this event, the following 1859 // would no longer return a :class:`arrays.NumpyExtensionArray` backed by a 1860 // NumPy array. 1861 // 1862 // >>> pd.array(['a', 'b'], dtype=str) 1863 // <NumpyExtensionArray> 1864 // ['a', 'b'] 1865 // Length: 2, dtype: str32 1866 // 1867 // This would instead return the new ExtensionArray dedicated for string 1868 // data. If you really need the new array to be backed by a NumPy array, 1869 // specify that in the dtype. 1870 // 1871 // >>> pd.array(['a', 'b'], dtype=np.dtype("<U1")) 1872 // <NumpyExtensionArray> 1873 // ['a', 'b'] 1874 // Length: 2, dtype: str32 1875 // 1876 // Finally, Pandas has arrays that mostly overlap with NumPy 1877 // 1878 // - :class:`arrays.DatetimeArray` 1879 // - :class:`arrays.TimedeltaArray` 1880 // 1881 // When data with a “datetime64[ns]“ or “timedelta64[ns]“ dtype is 1882 // passed, pandas will always return a “DatetimeArray“ or “TimedeltaArray“ 1883 // rather than a “NumpyExtensionArray“. This is for symmetry with the case of 1884 // timezone-aware data, which NumPy does not natively support. 1885 // 1886 // >>> pd.array(['2015', '2016'], dtype='datetime64[ns]') 1887 // <DatetimeArray> 1888 // ['2015-01-01 00:00:00', '2016-01-01 00:00:00'] 1889 // Length: 2, dtype: datetime64[ns] 1890 // 1891 // >>> pd.array(["1h", "2h"], dtype='timedelta64[ns]') 1892 // <TimedeltaArray> 1893 // ['0 days 01:00:00', '0 days 02:00:00'] 1894 // Length: 2, dtype: timedelta64[ns] 1895 // 1896 // Examples 1897 // -------- 1898 // If a dtype is not specified, pandas will infer the best dtype from the values. 1899 // See the description of `dtype` for the types pandas infers for. 1900 // 1901 // >>> pd.array([1, 2]) 1902 // <IntegerArray> 1903 // [1, 2] 1904 // Length: 2, dtype: Int64 1905 // 1906 // >>> pd.array([1, 2, np.nan]) 1907 // <IntegerArray> 1908 // [1, 2, <NA>] 1909 // Length: 3, dtype: Int64 1910 // 1911 // >>> pd.array([1.1, 2.2]) 1912 // <FloatingArray> 1913 // [1.1, 2.2] 1914 // Length: 2, dtype: Float64 1915 // 1916 // >>> pd.array(["a", None, "c"]) 1917 // <StringArray> 1918 // ['a', <NA>, 'c'] 1919 // Length: 3, dtype: string 1920 // 1921 // >>> with pd.option_context("string_storage", "pyarrow"): 1922 // ... arr = pd.array(["a", None, "c"]) 1923 // ... 1924 // >>> arr 1925 // <ArrowStringArray> 1926 // ['a', <NA>, 'c'] 1927 // Length: 3, dtype: string 1928 // 1929 // >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) 1930 // <PeriodArray> 1931 // ['2000-01-01', '2000-01-01'] 1932 // Length: 2, dtype: period[D] 1933 // 1934 // You can use the string alias for `dtype` 1935 // 1936 // >>> pd.array(['a', 'b', 'a'], dtype='category') 1937 // ['a', 'b', 'a'] 1938 // Categories (2, object): ['a', 'b'] 1939 // 1940 // # Or specify the actual dtype 1941 // 1942 // >>> pd.array(['a', 'b', 'a'], 1943 // ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) 1944 // ['a', 'b', 'a'] 1945 // Categories (3, object): ['a' < 'b' < 'c'] 1946 // 1947 // If pandas does not infer a dedicated extension type a 1948 // :class:`arrays.NumpyExtensionArray` is returned. 1949 // 1950 // >>> pd.array([1 + 1j, 3 + 2j]) 1951 // <NumpyExtensionArray> 1952 // [(1+1j), (3+2j)] 1953 // Length: 2, dtype: complex128 1954 // 1955 // As mentioned in the "Notes" section, new extension types may be added 1956 // in the future (by pandas or 3rd party libraries), causing the return 1957 // value to no longer be a :class:`arrays.NumpyExtensionArray`. Specify the 1958 // `dtype` as a NumPy dtype if you need to ensure there's no future change in 1959 // behavior. 1960 // 1961 // >>> pd.array([1, 2], dtype=np.dtype("int32")) 1962 // <NumpyExtensionArray> 1963 // [1, 2] 1964 // Length: 2, dtype: int32 1965 // 1966 // `data` must be 1-dimensional. A ValueError is raised when the input 1967 // has the wrong dimensionality. 1968 // 1969 // >>> pd.array(1) 1970 // Traceback (most recent call last): 1971 // 1972 // ... 1973 // 1974 // ValueError: Cannot pass scalar '1' to 'pandas.array'. 1975 // 1976 //go:linkname Array py.array 1977 func Array(data *py.Object, dtype *py.Object, copy *py.Object) *py.Object 1978 1979 // Format float representation in DataFrame with SI notation. 1980 // 1981 // Parameters 1982 // ---------- 1983 // accuracy : int, default 3 1984 // 1985 // Number of decimal digits after the floating point. 1986 // 1987 // use_eng_prefix : bool, default False 1988 // 1989 // Whether to represent a value with SI prefixes. 1990 // 1991 // Returns 1992 // ------- 1993 // None 1994 // 1995 // Examples 1996 // -------- 1997 // >>> df = pd.DataFrame([1e-9, 1e-3, 1, 1e3, 1e6]) 1998 // >>> df 1999 // 2000 // 0 2001 // 2002 // 0 1.000000e-09 2003 // 1 1.000000e-03 2004 // 2 1.000000e+00 2005 // 3 1.000000e+03 2006 // 4 1.000000e+06 2007 // 2008 // >>> pd.set_eng_float_format(accuracy=1) 2009 // >>> df 2010 // 2011 // 0 2012 // 2013 // 0 1.0E-09 2014 // 1 1.0E-03 2015 // 2 1.0E+00 2016 // 3 1.0E+03 2017 // 4 1.0E+06 2018 // 2019 // >>> pd.set_eng_float_format(use_eng_prefix=True) 2020 // >>> df 2021 // 2022 // 0 2023 // 2024 // 0 1.000n 2025 // 1 1.000m 2026 // 2 1.000 2027 // 3 1.000k 2028 // 4 1.000M 2029 // 2030 // >>> pd.set_eng_float_format(accuracy=1, use_eng_prefix=True) 2031 // >>> df 2032 // 2033 // 0 2034 // 2035 // 0 1.0n 2036 // 1 1.0m 2037 // 2 1.0 2038 // 3 1.0k 2039 // 4 1.0M 2040 // 2041 // >>> pd.set_option("display.float_format", None) # unset option 2042 // 2043 //go:linkname SetEngFloatFormat py.set_eng_float_format 2044 func SetEngFloatFormat(accuracy *py.Object, useEngPrefix *py.Object) *py.Object 2045 2046 // Infer the most likely frequency given the input index. 2047 // 2048 // Parameters 2049 // ---------- 2050 // index : DatetimeIndex, TimedeltaIndex, Series or array-like 2051 // 2052 // If passed a Series will use the values of the series (NOT THE INDEX). 2053 // 2054 // Returns 2055 // ------- 2056 // str or None 2057 // 2058 // None if no discernible frequency. 2059 // 2060 // Raises 2061 // ------ 2062 // TypeError 2063 // 2064 // If the index is not datetime-like. 2065 // 2066 // ValueError 2067 // 2068 // If there are fewer than three values. 2069 // 2070 // Examples 2071 // -------- 2072 // >>> idx = pd.date_range(start='2020/12/01', end='2020/12/30', periods=30) 2073 // >>> pd.infer_freq(idx) 2074 // 'D' 2075 // 2076 //go:linkname InferFreq py.infer_freq 2077 func InferFreq(index *py.Object) *py.Object 2078 2079 // Concatenate pandas objects along a particular axis. 2080 // 2081 // Allows optional set logic along the other axes. 2082 // 2083 // Can also add a layer of hierarchical indexing on the concatenation axis, 2084 // which may be useful if the labels are the same (or overlapping) on 2085 // the passed axis number. 2086 // 2087 // Parameters 2088 // ---------- 2089 // objs : a sequence or mapping of Series or DataFrame objects 2090 // 2091 // If a mapping is passed, the sorted keys will be used as the `keys` 2092 // argument, unless it is passed, in which case the values will be 2093 // selected (see below). Any None objects will be dropped silently unless 2094 // they are all None in which case a ValueError will be raised. 2095 // 2096 // axis : {0/'index', 1/'columns'}, default 0 2097 // 2098 // The axis to concatenate along. 2099 // 2100 // join : {'inner', 'outer'}, default 'outer' 2101 // 2102 // How to handle indexes on other axis (or axes). 2103 // 2104 // ignore_index : bool, default False 2105 // 2106 // If True, do not use the index values along the concatenation axis. The 2107 // resulting axis will be labeled 0, ..., n - 1. This is useful if you are 2108 // concatenating objects where the concatenation axis does not have 2109 // meaningful indexing information. Note the index values on the other 2110 // axes are still respected in the join. 2111 // 2112 // keys : sequence, default None 2113 // 2114 // If multiple levels passed, should contain tuples. Construct 2115 // hierarchical index using the passed keys as the outermost level. 2116 // 2117 // levels : list of sequences, default None 2118 // 2119 // Specific levels (unique values) to use for constructing a 2120 // MultiIndex. Otherwise they will be inferred from the keys. 2121 // 2122 // names : list, default None 2123 // 2124 // Names for the levels in the resulting hierarchical index. 2125 // 2126 // verify_integrity : bool, default False 2127 // 2128 // Check whether the new concatenated axis contains duplicates. This can 2129 // be very expensive relative to the actual data concatenation. 2130 // 2131 // sort : bool, default False 2132 // 2133 // Sort non-concatenation axis if it is not already aligned. One exception to 2134 // this is when the non-concatentation axis is a DatetimeIndex and join='outer' 2135 // and the axis is not already aligned. In that case, the non-concatenation 2136 // axis is always sorted lexicographically. 2137 // 2138 // copy : bool, default True 2139 // 2140 // If False, do not copy data unnecessarily. 2141 // 2142 // Returns 2143 // ------- 2144 // object, type of objs 2145 // 2146 // When concatenating all ``Series`` along the index (axis=0), a 2147 // ``Series`` is returned. When ``objs`` contains at least one 2148 // ``DataFrame``, a ``DataFrame`` is returned. When concatenating along 2149 // the columns (axis=1), a ``DataFrame`` is returned. 2150 // 2151 // See Also 2152 // -------- 2153 // DataFrame.join : Join DataFrames using indexes. 2154 // DataFrame.merge : Merge DataFrames by indexes or columns. 2155 // 2156 // Notes 2157 // ----- 2158 // The keys, levels, and names arguments are all optional. 2159 // 2160 // A walkthrough of how this method fits in with other tools for combining 2161 // pandas objects can be found `here 2162 // <https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__. 2163 // 2164 // It is not recommended to build DataFrames by adding single rows in a 2165 // for loop. Build a list of rows and make a DataFrame in a single concat. 2166 // 2167 // Examples 2168 // -------- 2169 // Combine two “Series“. 2170 // 2171 // >>> s1 = pd.Series(['a', 'b']) 2172 // >>> s2 = pd.Series(['c', 'd']) 2173 // >>> pd.concat([s1, s2]) 2174 // 0 a 2175 // 1 b 2176 // 0 c 2177 // 1 d 2178 // dtype: object 2179 // 2180 // Clear the existing index and reset it in the result 2181 // by setting the “ignore_index“ option to “True“. 2182 // 2183 // >>> pd.concat([s1, s2], ignore_index=True) 2184 // 0 a 2185 // 1 b 2186 // 2 c 2187 // 3 d 2188 // dtype: object 2189 // 2190 // Add a hierarchical index at the outermost level of 2191 // the data with the “keys“ option. 2192 // 2193 // >>> pd.concat([s1, s2], keys=['s1', 's2']) 2194 // s1 0 a 2195 // 2196 // 1 b 2197 // 2198 // s2 0 c 2199 // 2200 // 1 d 2201 // 2202 // dtype: object 2203 // 2204 // Label the index keys you create with the “names“ option. 2205 // 2206 // >>> pd.concat([s1, s2], keys=['s1', 's2'], 2207 // ... names=['Series name', 'Row ID']) 2208 // Series name Row ID 2209 // s1 0 a 2210 // 2211 // 1 b 2212 // 2213 // s2 0 c 2214 // 2215 // 1 d 2216 // 2217 // dtype: object 2218 // 2219 // Combine two “DataFrame“ objects with identical columns. 2220 // 2221 // >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], 2222 // ... columns=['letter', 'number']) 2223 // >>> df1 2224 // 2225 // letter number 2226 // 2227 // 0 a 1 2228 // 1 b 2 2229 // >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], 2230 // ... columns=['letter', 'number']) 2231 // >>> df2 2232 // 2233 // letter number 2234 // 2235 // 0 c 3 2236 // 1 d 4 2237 // >>> pd.concat([df1, df2]) 2238 // 2239 // letter number 2240 // 2241 // 0 a 1 2242 // 1 b 2 2243 // 0 c 3 2244 // 1 d 4 2245 // 2246 // Combine “DataFrame“ objects with overlapping columns 2247 // and return everything. Columns outside the intersection will 2248 // be filled with “NaN“ values. 2249 // 2250 // >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], 2251 // ... columns=['letter', 'number', 'animal']) 2252 // >>> df3 2253 // 2254 // letter number animal 2255 // 2256 // 0 c 3 cat 2257 // 1 d 4 dog 2258 // >>> pd.concat([df1, df3], sort=False) 2259 // 2260 // letter number animal 2261 // 2262 // 0 a 1 NaN 2263 // 1 b 2 NaN 2264 // 0 c 3 cat 2265 // 1 d 4 dog 2266 // 2267 // Combine “DataFrame“ objects with overlapping columns 2268 // and return only those that are shared by passing “inner“ to 2269 // the “join“ keyword argument. 2270 // 2271 // >>> pd.concat([df1, df3], join="inner") 2272 // 2273 // letter number 2274 // 2275 // 0 a 1 2276 // 1 b 2 2277 // 0 c 3 2278 // 1 d 4 2279 // 2280 // Combine “DataFrame“ objects horizontally along the x axis by 2281 // passing in “axis=1“. 2282 // 2283 // >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], 2284 // ... columns=['animal', 'name']) 2285 // >>> pd.concat([df1, df4], axis=1) 2286 // 2287 // letter number animal name 2288 // 2289 // 0 a 1 bird polly 2290 // 1 b 2 monkey george 2291 // 2292 // Prevent the result from including duplicate index values with the 2293 // “verify_integrity“ option. 2294 // 2295 // >>> df5 = pd.DataFrame([1], index=['a']) 2296 // >>> df5 2297 // 2298 // 0 2299 // 2300 // a 1 2301 // >>> df6 = pd.DataFrame([2], index=['a']) 2302 // >>> df6 2303 // 2304 // 0 2305 // 2306 // a 2 2307 // >>> pd.concat([df5, df6], verify_integrity=True) 2308 // Traceback (most recent call last): 2309 // 2310 // ... 2311 // 2312 // ValueError: Indexes have overlapping values: ['a'] 2313 // 2314 // Append a single row to the end of a “DataFrame“ object. 2315 // 2316 // >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0]) 2317 // >>> df7 2318 // 2319 // a b 2320 // 2321 // 0 1 2 2322 // >>> new_row = pd.Series({'a': 3, 'b': 4}) 2323 // >>> new_row 2324 // a 3 2325 // b 4 2326 // dtype: int64 2327 // >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True) 2328 // 2329 // a b 2330 // 2331 // 0 1 2 2332 // 1 3 4 2333 // 2334 //go:linkname Concat py.concat 2335 func Concat(objs *py.Object) *py.Object 2336 2337 // Reshape wide-format data to long. Generalized inverse of DataFrame.pivot. 2338 // 2339 // Accepts a dictionary, “groups“, in which each key is a new column name 2340 // and each value is a list of old column names that will be "melted" under 2341 // the new column name as part of the reshape. 2342 // 2343 // Parameters 2344 // ---------- 2345 // data : DataFrame 2346 // 2347 // The wide-format DataFrame. 2348 // 2349 // groups : dict 2350 // 2351 // {new_name : list_of_columns}. 2352 // 2353 // dropna : bool, default True 2354 // 2355 // Do not include columns whose entries are all NaN. 2356 // 2357 // Returns 2358 // ------- 2359 // DataFrame 2360 // 2361 // Reshaped DataFrame. 2362 // 2363 // See Also 2364 // -------- 2365 // melt : Unpivot a DataFrame from wide to long format, optionally leaving 2366 // 2367 // identifiers set. 2368 // 2369 // pivot : Create a spreadsheet-style pivot table as a DataFrame. 2370 // DataFrame.pivot : Pivot without aggregation that can handle 2371 // 2372 // non-numeric data. 2373 // 2374 // DataFrame.pivot_table : Generalization of pivot that can handle 2375 // 2376 // duplicate values for one index/column pair. 2377 // 2378 // DataFrame.unstack : Pivot based on the index values instead of a 2379 // 2380 // column. 2381 // 2382 // wide_to_long : Wide panel to long format. Less flexible but more 2383 // 2384 // user-friendly than melt. 2385 // 2386 // Examples 2387 // -------- 2388 // >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], 2389 // ... 'team': ['Red Sox', 'Yankees'], 2390 // ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) 2391 // >>> data 2392 // 2393 // hr1 hr2 team year1 year2 2394 // 2395 // 0 514 545 Red Sox 2007 2008 2396 // 1 573 526 Yankees 2007 2008 2397 // 2398 // >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) 2399 // 2400 // team year hr 2401 // 2402 // 0 Red Sox 2007 514 2403 // 1 Yankees 2007 573 2404 // 2 Red Sox 2008 545 2405 // 3 Yankees 2008 526 2406 // 2407 //go:linkname Lreshape py.lreshape 2408 func Lreshape(data *py.Object, groups *py.Object, dropna *py.Object) *py.Object 2409 2410 // Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. 2411 // 2412 // This function is useful to massage a DataFrame into a format where one 2413 // or more columns are identifier variables (`id_vars`), while all other 2414 // columns, considered measured variables (`value_vars`), are "unpivoted" to 2415 // the row axis, leaving just two non-identifier columns, 'variable' and 2416 // 'value'. 2417 // 2418 // Parameters 2419 // ---------- 2420 // id_vars : scalar, tuple, list, or ndarray, optional 2421 // 2422 // Column(s) to use as identifier variables. 2423 // 2424 // value_vars : scalar, tuple, list, or ndarray, optional 2425 // 2426 // Column(s) to unpivot. If not specified, uses all columns that 2427 // are not set as `id_vars`. 2428 // 2429 // var_name : scalar, default None 2430 // 2431 // Name to use for the 'variable' column. If None it uses 2432 // ``frame.columns.name`` or 'variable'. 2433 // 2434 // value_name : scalar, default 'value' 2435 // 2436 // Name to use for the 'value' column, can't be an existing column label. 2437 // 2438 // col_level : scalar, optional 2439 // 2440 // If columns are a MultiIndex then use this level to melt. 2441 // 2442 // ignore_index : bool, default True 2443 // 2444 // If True, original index is ignored. If False, the original index is retained. 2445 // Index labels will be repeated as necessary. 2446 // 2447 // Returns 2448 // ------- 2449 // DataFrame 2450 // 2451 // Unpivoted DataFrame. 2452 // 2453 // See Also 2454 // -------- 2455 // DataFrame.melt : Identical method. 2456 // pivot_table : Create a spreadsheet-style pivot table as a DataFrame. 2457 // DataFrame.pivot : Return reshaped DataFrame organized 2458 // 2459 // by given index / column values. 2460 // 2461 // DataFrame.explode : Explode a DataFrame from list-like 2462 // 2463 // columns to long format. 2464 // 2465 // Notes 2466 // ----- 2467 // Reference :ref:`the user guide <reshaping.melt>` for more examples. 2468 // 2469 // Examples 2470 // -------- 2471 // >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, 2472 // ... 'B': {0: 1, 1: 3, 2: 5}, 2473 // ... 'C': {0: 2, 1: 4, 2: 6}}) 2474 // >>> df 2475 // 2476 // A B C 2477 // 2478 // 0 a 1 2 2479 // 1 b 3 4 2480 // 2 c 5 6 2481 // 2482 // >>> pd.melt(df, id_vars=['A'], value_vars=['B']) 2483 // 2484 // A variable value 2485 // 2486 // 0 a B 1 2487 // 1 b B 3 2488 // 2 c B 5 2489 // 2490 // >>> pd.melt(df, id_vars=['A'], value_vars=['B', 'C']) 2491 // 2492 // A variable value 2493 // 2494 // 0 a B 1 2495 // 1 b B 3 2496 // 2 c B 5 2497 // 3 a C 2 2498 // 4 b C 4 2499 // 5 c C 6 2500 // 2501 // The names of 'variable' and 'value' columns can be customized: 2502 // 2503 // >>> pd.melt(df, id_vars=['A'], value_vars=['B'], 2504 // ... var_name='myVarname', value_name='myValname') 2505 // 2506 // A myVarname myValname 2507 // 2508 // 0 a B 1 2509 // 1 b B 3 2510 // 2 c B 5 2511 // 2512 // Original index values can be kept around: 2513 // 2514 // >>> pd.melt(df, id_vars=['A'], value_vars=['B', 'C'], ignore_index=False) 2515 // 2516 // A variable value 2517 // 2518 // 0 a B 1 2519 // 1 b B 3 2520 // 2 c B 5 2521 // 0 a C 2 2522 // 1 b C 4 2523 // 2 c C 6 2524 // 2525 // If you have multi-index columns: 2526 // 2527 // >>> df.columns = [list('ABC'), list('DEF')] 2528 // >>> df 2529 // 2530 // A B C 2531 // D E F 2532 // 2533 // 0 a 1 2 2534 // 1 b 3 4 2535 // 2 c 5 6 2536 // 2537 // >>> pd.melt(df, col_level=0, id_vars=['A'], value_vars=['B']) 2538 // 2539 // A variable value 2540 // 2541 // 0 a B 1 2542 // 1 b B 3 2543 // 2 c B 5 2544 // 2545 // >>> pd.melt(df, id_vars=[('A', 'D')], value_vars=[('B', 'E')]) 2546 // 2547 // (A, D) variable_0 variable_1 value 2548 // 2549 // 0 a B E 1 2550 // 1 b B E 3 2551 // 2 c B E 5 2552 // 2553 //go:linkname Melt py.melt 2554 func Melt(frame *py.Object, idVars *py.Object, valueVars *py.Object, varName *py.Object, valueName *py.Object, colLevel *py.Object, ignoreIndex *py.Object) *py.Object 2555 2556 // Unpivot a DataFrame from wide to long format. 2557 // 2558 // Less flexible but more user-friendly than melt. 2559 // 2560 // With stubnames ['A', 'B'], this function expects to find one or more 2561 // group of columns with format 2562 // A-suffix1, A-suffix2,..., B-suffix1, B-suffix2,... 2563 // You specify what you want to call this suffix in the resulting long format 2564 // with `j` (for example `j='year'`) 2565 // 2566 // Each row of these wide variables are assumed to be uniquely identified by 2567 // `i` (can be a single column name or a list of column names) 2568 // 2569 // All remaining variables in the data frame are left intact. 2570 // 2571 // Parameters 2572 // ---------- 2573 // df : DataFrame 2574 // 2575 // The wide-format DataFrame. 2576 // 2577 // stubnames : str or list-like 2578 // 2579 // The stub name(s). The wide format variables are assumed to 2580 // start with the stub names. 2581 // 2582 // i : str or list-like 2583 // 2584 // Column(s) to use as id variable(s). 2585 // 2586 // j : str 2587 // 2588 // The name of the sub-observation variable. What you wish to name your 2589 // suffix in the long format. 2590 // 2591 // sep : str, default "" 2592 // 2593 // A character indicating the separation of the variable names 2594 // in the wide format, to be stripped from the names in the long format. 2595 // For example, if your column names are A-suffix1, A-suffix2, you 2596 // can strip the hyphen by specifying `sep='-'`. 2597 // 2598 // suffix : str, default '\\d+' 2599 // 2600 // A regular expression capturing the wanted suffixes. '\\d+' captures 2601 // numeric suffixes. Suffixes with no numbers could be specified with the 2602 // negated character class '\\D+'. You can also further disambiguate 2603 // suffixes, for example, if your wide variables are of the form A-one, 2604 // B-two,.., and you have an unrelated column A-rating, you can ignore the 2605 // last one by specifying `suffix='(!?one|two)'`. When all suffixes are 2606 // numeric, they are cast to int64/float64. 2607 // 2608 // Returns 2609 // ------- 2610 // DataFrame 2611 // 2612 // A DataFrame that contains each stub name as a variable, with new index 2613 // (i, j). 2614 // 2615 // See Also 2616 // -------- 2617 // melt : Unpivot a DataFrame from wide to long format, optionally leaving 2618 // 2619 // identifiers set. 2620 // 2621 // pivot : Create a spreadsheet-style pivot table as a DataFrame. 2622 // DataFrame.pivot : Pivot without aggregation that can handle 2623 // 2624 // non-numeric data. 2625 // 2626 // DataFrame.pivot_table : Generalization of pivot that can handle 2627 // 2628 // duplicate values for one index/column pair. 2629 // 2630 // DataFrame.unstack : Pivot based on the index values instead of a 2631 // 2632 // column. 2633 // 2634 // Notes 2635 // ----- 2636 // All extra variables are left untouched. This simply uses 2637 // `pandas.melt` under the hood, but is hard-coded to "do the right thing" 2638 // in a typical case. 2639 // 2640 // Examples 2641 // -------- 2642 // >>> np.random.seed(123) 2643 // >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"}, 2644 // ... "A1980" : {0 : "d", 1 : "e", 2 : "f"}, 2645 // ... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7}, 2646 // ... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1}, 2647 // ... "X" : dict(zip(range(3), np.random.randn(3))) 2648 // ... }) 2649 // >>> df["id"] = df.index 2650 // >>> df 2651 // 2652 // A1970 A1980 B1970 B1980 X id 2653 // 2654 // 0 a d 2.5 3.2 -1.085631 0 2655 // 1 b e 1.2 1.3 0.997345 1 2656 // 2 c f 0.7 0.1 0.282978 2 2657 // >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year") 2658 // ... # doctest: +NORMALIZE_WHITESPACE 2659 // 2660 // X A B 2661 // 2662 // id year 2663 // 0 1970 -1.085631 a 2.5 2664 // 1 1970 0.997345 b 1.2 2665 // 2 1970 0.282978 c 0.7 2666 // 0 1980 -1.085631 d 3.2 2667 // 1 1980 0.997345 e 1.3 2668 // 2 1980 0.282978 f 0.1 2669 // 2670 // # With multiple id columns 2671 // 2672 // >>> df = pd.DataFrame({ 2673 // ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], 2674 // ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], 2675 // ... 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], 2676 // ... 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] 2677 // ... }) 2678 // >>> df 2679 // 2680 // famid birth ht1 ht2 2681 // 2682 // 0 1 1 2.8 3.4 2683 // 1 1 2 2.9 3.8 2684 // 2 1 3 2.2 2.9 2685 // 3 2 1 2.0 3.2 2686 // 4 2 2 1.8 2.8 2687 // 5 2 3 1.9 2.4 2688 // 6 3 1 2.2 3.3 2689 // 7 3 2 2.3 3.4 2690 // 8 3 3 2.1 2.9 2691 // >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age') 2692 // >>> l 2693 // ... # doctest: +NORMALIZE_WHITESPACE 2694 // 2695 // ht 2696 // 2697 // famid birth age 2698 // 1 1 1 2.8 2699 // 2700 // 2 3.4 2701 // 2 1 2.9 2702 // 2 3.8 2703 // 3 1 2.2 2704 // 2 2.9 2705 // 2706 // 2 1 1 2.0 2707 // 2708 // 2 3.2 2709 // 2 1 1.8 2710 // 2 2.8 2711 // 3 1 1.9 2712 // 2 2.4 2713 // 2714 // 3 1 1 2.2 2715 // 2716 // 2 3.3 2717 // 2 1 2.3 2718 // 2 3.4 2719 // 3 1 2.1 2720 // 2 2.9 2721 // 2722 // Going from long back to wide just takes some creative use of `unstack` 2723 // 2724 // >>> w = l.unstack() 2725 // >>> w.columns = w.columns.map('{0[0]}{0[1]}'.format) 2726 // >>> w.reset_index() 2727 // 2728 // famid birth ht1 ht2 2729 // 2730 // 0 1 1 2.8 3.4 2731 // 1 1 2 2.9 3.8 2732 // 2 1 3 2.2 2.9 2733 // 3 2 1 2.0 3.2 2734 // 4 2 2 1.8 2.8 2735 // 5 2 3 1.9 2.4 2736 // 6 3 1 2.2 3.3 2737 // 7 3 2 2.3 3.4 2738 // 8 3 3 2.1 2.9 2739 // 2740 // # Less wieldy column names are also handled 2741 // 2742 // >>> np.random.seed(0) 2743 // >>> df = pd.DataFrame({'A(weekly)-2010': np.random.rand(3), 2744 // ... 'A(weekly)-2011': np.random.rand(3), 2745 // ... 'B(weekly)-2010': np.random.rand(3), 2746 // ... 'B(weekly)-2011': np.random.rand(3), 2747 // ... 'X' : np.random.randint(3, size=3)}) 2748 // >>> df['id'] = df.index 2749 // >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS 2750 // 2751 // A(weekly)-2010 A(weekly)-2011 B(weekly)-2010 B(weekly)-2011 X id 2752 // 2753 // 0 0.548814 0.544883 0.437587 0.383442 0 0 2754 // 1 0.715189 0.423655 0.891773 0.791725 1 1 2755 // 2 0.602763 0.645894 0.963663 0.528895 1 2 2756 // 2757 // >>> pd.wide_to_long(df, ['A(weekly)', 'B(weekly)'], i='id', 2758 // ... j='year', sep='-') 2759 // ... # doctest: +NORMALIZE_WHITESPACE 2760 // 2761 // X A(weekly) B(weekly) 2762 // 2763 // id year 2764 // 0 2010 0 0.548814 0.437587 2765 // 1 2010 1 0.715189 0.891773 2766 // 2 2010 1 0.602763 0.963663 2767 // 0 2011 0 0.544883 0.383442 2768 // 1 2011 1 0.423655 0.791725 2769 // 2 2011 1 0.645894 0.528895 2770 // 2771 // If we have many columns, we could also use a regex to find our 2772 // stubnames and pass that list on to wide_to_long 2773 // 2774 // >>> stubnames = sorted( 2775 // ... set([match[0] for match in df.columns.str.findall( 2776 // ... r'[A-B]\(.*\)').values if match != []]) 2777 // ... ) 2778 // >>> list(stubnames) 2779 // ['A(weekly)', 'B(weekly)'] 2780 // 2781 // All of the above examples have integers as suffixes. It is possible to 2782 // have non-integers as suffixes. 2783 // 2784 // >>> df = pd.DataFrame({ 2785 // ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], 2786 // ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], 2787 // ... 'ht_one': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], 2788 // ... 'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] 2789 // ... }) 2790 // >>> df 2791 // 2792 // famid birth ht_one ht_two 2793 // 2794 // 0 1 1 2.8 3.4 2795 // 1 1 2 2.9 3.8 2796 // 2 1 3 2.2 2.9 2797 // 3 2 1 2.0 3.2 2798 // 4 2 2 1.8 2.8 2799 // 5 2 3 1.9 2.4 2800 // 6 3 1 2.2 3.3 2801 // 7 3 2 2.3 3.4 2802 // 8 3 3 2.1 2.9 2803 // 2804 // >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age', 2805 // ... sep='_', suffix=r'\w+') 2806 // >>> l 2807 // ... # doctest: +NORMALIZE_WHITESPACE 2808 // 2809 // ht 2810 // 2811 // famid birth age 2812 // 1 1 one 2.8 2813 // 2814 // two 3.4 2815 // 2 one 2.9 2816 // two 3.8 2817 // 3 one 2.2 2818 // two 2.9 2819 // 2820 // 2 1 one 2.0 2821 // 2822 // two 3.2 2823 // 2 one 1.8 2824 // two 2.8 2825 // 3 one 1.9 2826 // two 2.4 2827 // 2828 // 3 1 one 2.2 2829 // 2830 // two 3.3 2831 // 2 one 2.3 2832 // two 3.4 2833 // 3 one 2.1 2834 // two 2.9 2835 // 2836 //go:linkname WideToLong py.wide_to_long 2837 func WideToLong(df *py.Object, stubnames *py.Object, i *py.Object, j *py.Object, sep *py.Object, suffix *py.Object) *py.Object 2838 2839 // Merge DataFrame or named Series objects with a database-style join. 2840 // 2841 // A named Series object is treated as a DataFrame with a single named column. 2842 // 2843 // The join is done on columns or indexes. If joining columns on 2844 // columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes 2845 // on indexes or indexes on a column or columns, the index will be passed on. 2846 // When performing a cross merge, no column specifications to merge on are 2847 // allowed. 2848 // 2849 // .. warning:: 2850 // 2851 // If both key columns contain rows where the key is a null value, those 2852 // rows will be matched against each other. This is different from usual SQL 2853 // join behaviour and can lead to unexpected results. 2854 // 2855 // Parameters 2856 // ---------- 2857 // left : DataFrame or named Series 2858 // right : DataFrame or named Series 2859 // 2860 // Object to merge with. 2861 // 2862 // how : {'left', 'right', 'outer', 'inner', 'cross'}, default 'inner' 2863 // 2864 // Type of merge to be performed. 2865 // 2866 // * left: use only keys from left frame, similar to a SQL left outer join; 2867 // preserve key order. 2868 // * right: use only keys from right frame, similar to a SQL right outer join; 2869 // preserve key order. 2870 // * outer: use union of keys from both frames, similar to a SQL full outer 2871 // join; sort keys lexicographically. 2872 // * inner: use intersection of keys from both frames, similar to a SQL inner 2873 // join; preserve the order of the left keys. 2874 // * cross: creates the cartesian product from both frames, preserves the order 2875 // of the left keys. 2876 // 2877 // on : label or list 2878 // 2879 // Column or index level names to join on. These must be found in both 2880 // DataFrames. If `on` is None and not merging on indexes then this defaults 2881 // to the intersection of the columns in both DataFrames. 2882 // 2883 // left_on : label or list, or array-like 2884 // 2885 // Column or index level names to join on in the left DataFrame. Can also 2886 // be an array or list of arrays of the length of the left DataFrame. 2887 // These arrays are treated as if they are columns. 2888 // 2889 // right_on : label or list, or array-like 2890 // 2891 // Column or index level names to join on in the right DataFrame. Can also 2892 // be an array or list of arrays of the length of the right DataFrame. 2893 // These arrays are treated as if they are columns. 2894 // 2895 // left_index : bool, default False 2896 // 2897 // Use the index from the left DataFrame as the join key(s). If it is a 2898 // MultiIndex, the number of keys in the other DataFrame (either the index 2899 // or a number of columns) must match the number of levels. 2900 // 2901 // right_index : bool, default False 2902 // 2903 // Use the index from the right DataFrame as the join key. Same caveats as 2904 // left_index. 2905 // 2906 // sort : bool, default False 2907 // 2908 // Sort the join keys lexicographically in the result DataFrame. If False, 2909 // the order of the join keys depends on the join type (how keyword). 2910 // 2911 // suffixes : list-like, default is ("_x", "_y") 2912 // 2913 // A length-2 sequence where each element is optionally a string 2914 // indicating the suffix to add to overlapping column names in 2915 // `left` and `right` respectively. Pass a value of `None` instead 2916 // of a string to indicate that the column name from `left` or 2917 // `right` should be left as-is, with no suffix. At least one of the 2918 // values must not be None. 2919 // 2920 // copy : bool, default True 2921 // 2922 // If False, avoid copy if possible. 2923 // 2924 // .. note:: 2925 // The `copy` keyword will change behavior in pandas 3.0. 2926 // `Copy-on-Write 2927 // <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 2928 // will be enabled by default, which means that all methods with a 2929 // `copy` keyword will use a lazy copy mechanism to defer the copy and 2930 // ignore the `copy` keyword. The `copy` keyword will be removed in a 2931 // future version of pandas. 2932 // 2933 // You can already get the future behavior and improvements through 2934 // enabling copy on write ``pd.options.mode.copy_on_write = True`` 2935 // 2936 // indicator : bool or str, default False 2937 // 2938 // If True, adds a column to the output DataFrame called "_merge" with 2939 // information on the source of each row. The column can be given a different 2940 // name by providing a string argument. The column will have a Categorical 2941 // type with the value of "left_only" for observations whose merge key only 2942 // appears in the left DataFrame, "right_only" for observations 2943 // whose merge key only appears in the right DataFrame, and "both" 2944 // if the observation's merge key is found in both DataFrames. 2945 // 2946 // validate : str, optional 2947 // 2948 // If specified, checks if merge is of specified type. 2949 // 2950 // * "one_to_one" or "1:1": check if merge keys are unique in both 2951 // left and right datasets. 2952 // * "one_to_many" or "1:m": check if merge keys are unique in left 2953 // dataset. 2954 // * "many_to_one" or "m:1": check if merge keys are unique in right 2955 // dataset. 2956 // * "many_to_many" or "m:m": allowed, but does not result in checks. 2957 // 2958 // Returns 2959 // ------- 2960 // DataFrame 2961 // 2962 // A DataFrame of the two merged objects. 2963 // 2964 // See Also 2965 // -------- 2966 // merge_ordered : Merge with optional filling/interpolation. 2967 // merge_asof : Merge on nearest keys. 2968 // DataFrame.join : Similar method using indices. 2969 // 2970 // Examples 2971 // -------- 2972 // >>> df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], 2973 // ... 'value': [1, 2, 3, 5]}) 2974 // >>> df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], 2975 // ... 'value': [5, 6, 7, 8]}) 2976 // >>> df1 2977 // 2978 // lkey value 2979 // 2980 // 0 foo 1 2981 // 1 bar 2 2982 // 2 baz 3 2983 // 3 foo 5 2984 // >>> df2 2985 // 2986 // rkey value 2987 // 2988 // 0 foo 5 2989 // 1 bar 6 2990 // 2 baz 7 2991 // 3 foo 8 2992 // 2993 // Merge df1 and df2 on the lkey and rkey columns. The value columns have 2994 // the default suffixes, _x and _y, appended. 2995 // 2996 // >>> df1.merge(df2, left_on='lkey', right_on='rkey') 2997 // 2998 // lkey value_x rkey value_y 2999 // 3000 // 0 foo 1 foo 5 3001 // 1 foo 1 foo 8 3002 // 2 bar 2 bar 6 3003 // 3 baz 3 baz 7 3004 // 4 foo 5 foo 5 3005 // 5 foo 5 foo 8 3006 // 3007 // Merge DataFrames df1 and df2 with specified left and right suffixes 3008 // appended to any overlapping columns. 3009 // 3010 // >>> df1.merge(df2, left_on='lkey', right_on='rkey', 3011 // ... suffixes=('_left', '_right')) 3012 // 3013 // lkey value_left rkey value_right 3014 // 3015 // 0 foo 1 foo 5 3016 // 1 foo 1 foo 8 3017 // 2 bar 2 bar 6 3018 // 3 baz 3 baz 7 3019 // 4 foo 5 foo 5 3020 // 5 foo 5 foo 8 3021 // 3022 // Merge DataFrames df1 and df2, but raise an exception if the DataFrames have 3023 // any overlapping columns. 3024 // 3025 // >>> df1.merge(df2, left_on='lkey', right_on='rkey', suffixes=(False, False)) 3026 // Traceback (most recent call last): 3027 // ... 3028 // ValueError: columns overlap but no suffix specified: 3029 // 3030 // Index(['value'], dtype='object') 3031 // 3032 // >>> df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]}) 3033 // >>> df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]}) 3034 // >>> df1 3035 // 3036 // a b 3037 // 3038 // 0 foo 1 3039 // 1 bar 2 3040 // >>> df2 3041 // 3042 // a c 3043 // 3044 // 0 foo 3 3045 // 1 baz 4 3046 // 3047 // >>> df1.merge(df2, how='inner', on='a') 3048 // 3049 // a b c 3050 // 3051 // 0 foo 1 3 3052 // 3053 // >>> df1.merge(df2, how='left', on='a') 3054 // 3055 // a b c 3056 // 3057 // 0 foo 1 3.0 3058 // 1 bar 2 NaN 3059 // 3060 // >>> df1 = pd.DataFrame({'left': ['foo', 'bar']}) 3061 // >>> df2 = pd.DataFrame({'right': [7, 8]}) 3062 // >>> df1 3063 // 3064 // left 3065 // 3066 // 0 foo 3067 // 1 bar 3068 // >>> df2 3069 // 3070 // right 3071 // 3072 // 0 7 3073 // 1 8 3074 // 3075 // >>> df1.merge(df2, how='cross') 3076 // 3077 // left right 3078 // 3079 // 0 foo 7 3080 // 1 foo 8 3081 // 2 bar 7 3082 // 3 bar 8 3083 // 3084 //go:linkname Merge py.merge 3085 func Merge(left *py.Object, right *py.Object, how *py.Object, on *py.Object, leftOn *py.Object, rightOn *py.Object, leftIndex *py.Object, rightIndex *py.Object, sort *py.Object, suffixes *py.Object, copy *py.Object, indicator *py.Object, validate *py.Object) *py.Object 3086 3087 // Perform a merge by key distance. 3088 // 3089 // This is similar to a left-join except that we match on nearest 3090 // key rather than equal keys. Both DataFrames must be sorted by the key. 3091 // 3092 // For each row in the left DataFrame: 3093 // 3094 // - A "backward" search selects the last row in the right DataFrame whose 3095 // 'on' key is less than or equal to the left's key. 3096 // 3097 // - A "forward" search selects the first row in the right DataFrame whose 3098 // 'on' key is greater than or equal to the left's key. 3099 // 3100 // - A "nearest" search selects the row in the right DataFrame whose 'on' 3101 // key is closest in absolute distance to the left's key. 3102 // 3103 // Optionally match on equivalent keys with 'by' before searching with 'on'. 3104 // 3105 // Parameters 3106 // ---------- 3107 // left : DataFrame or named Series 3108 // right : DataFrame or named Series 3109 // on : label 3110 // 3111 // Field name to join on. Must be found in both DataFrames. 3112 // The data MUST be ordered. Furthermore this must be a numeric column, 3113 // such as datetimelike, integer, or float. On or left_on/right_on 3114 // must be given. 3115 // 3116 // left_on : label 3117 // 3118 // Field name to join on in left DataFrame. 3119 // 3120 // right_on : label 3121 // 3122 // Field name to join on in right DataFrame. 3123 // 3124 // left_index : bool 3125 // 3126 // Use the index of the left DataFrame as the join key. 3127 // 3128 // right_index : bool 3129 // 3130 // Use the index of the right DataFrame as the join key. 3131 // 3132 // by : column name or list of column names 3133 // 3134 // Match on these columns before performing merge operation. 3135 // 3136 // left_by : column name 3137 // 3138 // Field names to match on in the left DataFrame. 3139 // 3140 // right_by : column name 3141 // 3142 // Field names to match on in the right DataFrame. 3143 // 3144 // suffixes : 2-length sequence (tuple, list, ...) 3145 // 3146 // Suffix to apply to overlapping column names in the left and right 3147 // side, respectively. 3148 // 3149 // tolerance : int or Timedelta, optional, default None 3150 // 3151 // Select asof tolerance within this range; must be compatible 3152 // with the merge index. 3153 // 3154 // allow_exact_matches : bool, default True 3155 // 3156 // - If True, allow matching with the same 'on' value 3157 // (i.e. less-than-or-equal-to / greater-than-or-equal-to) 3158 // - If False, don't match the same 'on' value 3159 // (i.e., strictly less-than / strictly greater-than). 3160 // 3161 // direction : 'backward' (default), 'forward', or 'nearest' 3162 // 3163 // Whether to search for prior, subsequent, or closest matches. 3164 // 3165 // Returns 3166 // ------- 3167 // DataFrame 3168 // 3169 // See Also 3170 // -------- 3171 // merge : Merge with a database-style join. 3172 // merge_ordered : Merge with optional filling/interpolation. 3173 // 3174 // Examples 3175 // -------- 3176 // >>> left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) 3177 // >>> left 3178 // 3179 // a left_val 3180 // 3181 // 0 1 a 3182 // 1 5 b 3183 // 2 10 c 3184 // 3185 // >>> right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) 3186 // >>> right 3187 // 3188 // a right_val 3189 // 3190 // 0 1 1 3191 // 1 2 2 3192 // 2 3 3 3193 // 3 6 6 3194 // 4 7 7 3195 // 3196 // >>> pd.merge_asof(left, right, on="a") 3197 // 3198 // a left_val right_val 3199 // 3200 // 0 1 a 1 3201 // 1 5 b 3 3202 // 2 10 c 7 3203 // 3204 // >>> pd.merge_asof(left, right, on="a", allow_exact_matches=False) 3205 // 3206 // a left_val right_val 3207 // 3208 // 0 1 a NaN 3209 // 1 5 b 3.0 3210 // 2 10 c 7.0 3211 // 3212 // >>> pd.merge_asof(left, right, on="a", direction="forward") 3213 // 3214 // a left_val right_val 3215 // 3216 // 0 1 a 1.0 3217 // 1 5 b 6.0 3218 // 2 10 c NaN 3219 // 3220 // >>> pd.merge_asof(left, right, on="a", direction="nearest") 3221 // 3222 // a left_val right_val 3223 // 3224 // 0 1 a 1 3225 // 1 5 b 6 3226 // 2 10 c 7 3227 // 3228 // We can use indexed DataFrames as well. 3229 // 3230 // >>> left = pd.DataFrame({"left_val": ["a", "b", "c"]}, index=[1, 5, 10]) 3231 // >>> left 3232 // 3233 // left_val 3234 // 3235 // 1 a 3236 // 5 b 3237 // 10 c 3238 // 3239 // >>> right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7]) 3240 // >>> right 3241 // 3242 // right_val 3243 // 3244 // 1 1 3245 // 2 2 3246 // 3 3 3247 // 6 6 3248 // 7 7 3249 // 3250 // >>> pd.merge_asof(left, right, left_index=True, right_index=True) 3251 // 3252 // left_val right_val 3253 // 3254 // 1 a 1 3255 // 5 b 3 3256 // 10 c 7 3257 // 3258 // # Here is a real-world times-series example 3259 // 3260 // >>> quotes = pd.DataFrame( 3261 // ... { 3262 // ... "time": [ 3263 // ... pd.Timestamp("2016-05-25 13:30:00.023"), 3264 // ... pd.Timestamp("2016-05-25 13:30:00.023"), 3265 // ... pd.Timestamp("2016-05-25 13:30:00.030"), 3266 // ... pd.Timestamp("2016-05-25 13:30:00.041"), 3267 // ... pd.Timestamp("2016-05-25 13:30:00.048"), 3268 // ... pd.Timestamp("2016-05-25 13:30:00.049"), 3269 // ... pd.Timestamp("2016-05-25 13:30:00.072"), 3270 // ... pd.Timestamp("2016-05-25 13:30:00.075") 3271 // ... ], 3272 // ... "ticker": [ 3273 // ... "GOOG", 3274 // ... "MSFT", 3275 // ... "MSFT", 3276 // ... "MSFT", 3277 // ... "GOOG", 3278 // ... "AAPL", 3279 // ... "GOOG", 3280 // ... "MSFT" 3281 // ... ], 3282 // ... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], 3283 // ... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03] 3284 // ... } 3285 // ... ) 3286 // >>> quotes 3287 // 3288 // time ticker bid ask 3289 // 3290 // 0 2016-05-25 13:30:00.023 GOOG 720.50 720.93 3291 // 1 2016-05-25 13:30:00.023 MSFT 51.95 51.96 3292 // 2 2016-05-25 13:30:00.030 MSFT 51.97 51.98 3293 // 3 2016-05-25 13:30:00.041 MSFT 51.99 52.00 3294 // 4 2016-05-25 13:30:00.048 GOOG 720.50 720.93 3295 // 5 2016-05-25 13:30:00.049 AAPL 97.99 98.01 3296 // 6 2016-05-25 13:30:00.072 GOOG 720.50 720.88 3297 // 7 2016-05-25 13:30:00.075 MSFT 52.01 52.03 3298 // 3299 // >>> trades = pd.DataFrame( 3300 // ... { 3301 // ... "time": [ 3302 // ... pd.Timestamp("2016-05-25 13:30:00.023"), 3303 // ... pd.Timestamp("2016-05-25 13:30:00.038"), 3304 // ... pd.Timestamp("2016-05-25 13:30:00.048"), 3305 // ... pd.Timestamp("2016-05-25 13:30:00.048"), 3306 // ... pd.Timestamp("2016-05-25 13:30:00.048") 3307 // ... ], 3308 // ... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], 3309 // ... "price": [51.95, 51.95, 720.77, 720.92, 98.0], 3310 // ... "quantity": [75, 155, 100, 100, 100] 3311 // ... } 3312 // ... ) 3313 // >>> trades 3314 // 3315 // time ticker price quantity 3316 // 3317 // 0 2016-05-25 13:30:00.023 MSFT 51.95 75 3318 // 1 2016-05-25 13:30:00.038 MSFT 51.95 155 3319 // 2 2016-05-25 13:30:00.048 GOOG 720.77 100 3320 // 3 2016-05-25 13:30:00.048 GOOG 720.92 100 3321 // 4 2016-05-25 13:30:00.048 AAPL 98.00 100 3322 // 3323 // # By default we are taking the asof of the quotes 3324 // 3325 // >>> pd.merge_asof(trades, quotes, on="time", by="ticker") 3326 // 3327 // time ticker price quantity bid ask 3328 // 3329 // 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 3330 // 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 3331 // 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 3332 // 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 3333 // 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN 3334 // 3335 // # We only asof within 2ms between the quote time and the trade time 3336 // 3337 // >>> pd.merge_asof( 3338 // ... trades, quotes, on="time", by="ticker", tolerance=pd.Timedelta("2ms") 3339 // ... ) 3340 // 3341 // time ticker price quantity bid ask 3342 // 3343 // 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 3344 // 1 2016-05-25 13:30:00.038 MSFT 51.95 155 NaN NaN 3345 // 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 3346 // 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 3347 // 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN 3348 // 3349 // We only asof within 10ms between the quote time and the trade time 3350 // and we exclude exact matches on time. However *prior* data will 3351 // propagate forward 3352 // 3353 // >>> pd.merge_asof( 3354 // ... trades, 3355 // ... quotes, 3356 // ... on="time", 3357 // ... by="ticker", 3358 // ... tolerance=pd.Timedelta("10ms"), 3359 // ... allow_exact_matches=False 3360 // ... ) 3361 // 3362 // time ticker price quantity bid ask 3363 // 3364 // 0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN 3365 // 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 3366 // 2 2016-05-25 13:30:00.048 GOOG 720.77 100 NaN NaN 3367 // 3 2016-05-25 13:30:00.048 GOOG 720.92 100 NaN NaN 3368 // 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN 3369 // 3370 //go:linkname MergeAsof py.merge_asof 3371 func MergeAsof(left *py.Object, right *py.Object, on *py.Object, leftOn *py.Object, rightOn *py.Object, leftIndex *py.Object, rightIndex *py.Object, by *py.Object, leftBy *py.Object, rightBy *py.Object, suffixes *py.Object, tolerance *py.Object, allowExactMatches *py.Object, direction *py.Object) *py.Object 3372 3373 // Perform a merge for ordered data with optional filling/interpolation. 3374 // 3375 // Designed for ordered data like time series data. Optionally 3376 // perform group-wise merge (see examples). 3377 // 3378 // Parameters 3379 // ---------- 3380 // left : DataFrame or named Series 3381 // right : DataFrame or named Series 3382 // on : label or list 3383 // 3384 // Field names to join on. Must be found in both DataFrames. 3385 // 3386 // left_on : label or list, or array-like 3387 // 3388 // Field names to join on in left DataFrame. Can be a vector or list of 3389 // vectors of the length of the DataFrame to use a particular vector as 3390 // the join key instead of columns. 3391 // 3392 // right_on : label or list, or array-like 3393 // 3394 // Field names to join on in right DataFrame or vector/list of vectors per 3395 // left_on docs. 3396 // 3397 // left_by : column name or list of column names 3398 // 3399 // Group left DataFrame by group columns and merge piece by piece with 3400 // right DataFrame. Must be None if either left or right are a Series. 3401 // 3402 // right_by : column name or list of column names 3403 // 3404 // Group right DataFrame by group columns and merge piece by piece with 3405 // left DataFrame. Must be None if either left or right are a Series. 3406 // 3407 // fill_method : {'ffill', None}, default None 3408 // 3409 // Interpolation method for data. 3410 // 3411 // suffixes : list-like, default is ("_x", "_y") 3412 // 3413 // A length-2 sequence where each element is optionally a string 3414 // indicating the suffix to add to overlapping column names in 3415 // `left` and `right` respectively. Pass a value of `None` instead 3416 // of a string to indicate that the column name from `left` or 3417 // `right` should be left as-is, with no suffix. At least one of the 3418 // values must not be None. 3419 // 3420 // how : {'left', 'right', 'outer', 'inner'}, default 'outer' 3421 // - left: use only keys from left frame (SQL: left outer join) 3422 // - right: use only keys from right frame (SQL: right outer join) 3423 // - outer: use union of keys from both frames (SQL: full outer join) 3424 // - inner: use intersection of keys from both frames (SQL: inner join). 3425 // 3426 // Returns 3427 // ------- 3428 // DataFrame 3429 // 3430 // The merged DataFrame output type will be the same as 3431 // 'left', if it is a subclass of DataFrame. 3432 // 3433 // See Also 3434 // -------- 3435 // merge : Merge with a database-style join. 3436 // merge_asof : Merge on nearest keys. 3437 // 3438 // Examples 3439 // -------- 3440 // >>> from pandas import merge_ordered 3441 // >>> df1 = pd.DataFrame( 3442 // ... { 3443 // ... "key": ["a", "c", "e", "a", "c", "e"], 3444 // ... "lvalue": [1, 2, 3, 1, 2, 3], 3445 // ... "group": ["a", "a", "a", "b", "b", "b"] 3446 // ... } 3447 // ... ) 3448 // >>> df1 3449 // 3450 // key lvalue group 3451 // 3452 // 0 a 1 a 3453 // 1 c 2 a 3454 // 2 e 3 a 3455 // 3 a 1 b 3456 // 4 c 2 b 3457 // 5 e 3 b 3458 // 3459 // >>> df2 = pd.DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]}) 3460 // >>> df2 3461 // 3462 // key rvalue 3463 // 3464 // 0 b 1 3465 // 1 c 2 3466 // 2 d 3 3467 // 3468 // >>> merge_ordered(df1, df2, fill_method="ffill", left_by="group") 3469 // 3470 // key lvalue group rvalue 3471 // 3472 // 0 a 1 a NaN 3473 // 1 b 1 a 1.0 3474 // 2 c 2 a 2.0 3475 // 3 d 2 a 3.0 3476 // 4 e 3 a 3.0 3477 // 5 a 1 b NaN 3478 // 6 b 1 b 1.0 3479 // 7 c 2 b 2.0 3480 // 8 d 2 b 3.0 3481 // 9 e 3 b 3.0 3482 // 3483 //go:linkname MergeOrdered py.merge_ordered 3484 func MergeOrdered(left *py.Object, right *py.Object, on *py.Object, leftOn *py.Object, rightOn *py.Object, leftBy *py.Object, rightBy *py.Object, fillMethod *py.Object, suffixes *py.Object, how *py.Object) *py.Object 3485 3486 // Compute a simple cross tabulation of two (or more) factors. 3487 // 3488 // By default, computes a frequency table of the factors unless an 3489 // array of values and an aggregation function are passed. 3490 // 3491 // Parameters 3492 // ---------- 3493 // index : array-like, Series, or list of arrays/Series 3494 // 3495 // Values to group by in the rows. 3496 // 3497 // columns : array-like, Series, or list of arrays/Series 3498 // 3499 // Values to group by in the columns. 3500 // 3501 // values : array-like, optional 3502 // 3503 // Array of values to aggregate according to the factors. 3504 // Requires `aggfunc` be specified. 3505 // 3506 // rownames : sequence, default None 3507 // 3508 // If passed, must match number of row arrays passed. 3509 // 3510 // colnames : sequence, default None 3511 // 3512 // If passed, must match number of column arrays passed. 3513 // 3514 // aggfunc : function, optional 3515 // 3516 // If specified, requires `values` be specified as well. 3517 // 3518 // margins : bool, default False 3519 // 3520 // Add row/column margins (subtotals). 3521 // 3522 // margins_name : str, default 'All' 3523 // 3524 // Name of the row/column that will contain the totals 3525 // when margins is True. 3526 // 3527 // dropna : bool, default True 3528 // 3529 // Do not include columns whose entries are all NaN. 3530 // 3531 // normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False 3532 // 3533 // Normalize by dividing all values by the sum of values. 3534 // 3535 // - If passed 'all' or `True`, will normalize over all values. 3536 // - If passed 'index' will normalize over each row. 3537 // - If passed 'columns' will normalize over each column. 3538 // - If margins is `True`, will also normalize margin values. 3539 // 3540 // Returns 3541 // ------- 3542 // DataFrame 3543 // 3544 // Cross tabulation of the data. 3545 // 3546 // See Also 3547 // -------- 3548 // DataFrame.pivot : Reshape data based on column values. 3549 // pivot_table : Create a pivot table as a DataFrame. 3550 // 3551 // Notes 3552 // ----- 3553 // Any Series passed will have their name attributes used unless row or column 3554 // names for the cross-tabulation are specified. 3555 // 3556 // Any input passed containing Categorical data will have **all** of its 3557 // categories included in the cross-tabulation, even if the actual data does 3558 // not contain any instances of a particular category. 3559 // 3560 // In the event that there aren't overlapping indexes an empty DataFrame will 3561 // be returned. 3562 // 3563 // Reference :ref:`the user guide <reshaping.crosstabulations>` for more examples. 3564 // 3565 // Examples 3566 // -------- 3567 // >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar", 3568 // ... "bar", "bar", "foo", "foo", "foo"], dtype=object) 3569 // >>> b = np.array(["one", "one", "one", "two", "one", "one", 3570 // ... "one", "two", "two", "two", "one"], dtype=object) 3571 // >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny", 3572 // ... "shiny", "dull", "shiny", "shiny", "shiny"], 3573 // ... dtype=object) 3574 // >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) 3575 // b one two 3576 // c dull shiny dull shiny 3577 // a 3578 // bar 1 2 1 0 3579 // foo 2 2 1 2 3580 // 3581 // Here 'c' and 'f' are not represented in the data and will not be 3582 // shown in the output because dropna is True by default. Set 3583 // dropna=False to preserve categories with no data. 3584 // 3585 // >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) 3586 // >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) 3587 // >>> pd.crosstab(foo, bar) 3588 // col_0 d e 3589 // row_0 3590 // a 1 0 3591 // b 0 1 3592 // >>> pd.crosstab(foo, bar, dropna=False) 3593 // col_0 d e f 3594 // row_0 3595 // a 1 0 0 3596 // b 0 1 0 3597 // c 0 0 0 3598 // 3599 //go:linkname Crosstab py.crosstab 3600 func Crosstab(index *py.Object, columns *py.Object, values *py.Object, rownames *py.Object, colnames *py.Object, aggfunc *py.Object, margins *py.Object, marginsName *py.Object, dropna *py.Object, normalize *py.Object) *py.Object 3601 3602 // Return reshaped DataFrame organized by given index / column values. 3603 // 3604 // Reshape data (produce a "pivot" table) based on column values. Uses 3605 // unique values from specified `index` / `columns` to form axes of the 3606 // resulting DataFrame. This function does not support data 3607 // aggregation, multiple values will result in a MultiIndex in the 3608 // columns. See the :ref:`User Guide <reshaping>` for more on reshaping. 3609 // 3610 // Parameters 3611 // ---------- 3612 // data : DataFrame 3613 // columns : str or object or a list of str 3614 // 3615 // Column to use to make new frame's columns. 3616 // 3617 // index : str or object or a list of str, optional 3618 // 3619 // Column to use to make new frame's index. If not given, uses existing index. 3620 // 3621 // values : str, object or a list of the previous, optional 3622 // 3623 // Column(s) to use for populating new frame's values. If not 3624 // specified, all remaining columns will be used and the result will 3625 // have hierarchically indexed columns. 3626 // 3627 // Returns 3628 // ------- 3629 // DataFrame 3630 // 3631 // Returns reshaped DataFrame. 3632 // 3633 // Raises 3634 // ------ 3635 // ValueError: 3636 // 3637 // When there are any `index`, `columns` combinations with multiple 3638 // values. `DataFrame.pivot_table` when you need to aggregate. 3639 // 3640 // See Also 3641 // -------- 3642 // DataFrame.pivot_table : Generalization of pivot that can handle 3643 // 3644 // duplicate values for one index/column pair. 3645 // 3646 // DataFrame.unstack : Pivot based on the index values instead of a 3647 // 3648 // column. 3649 // 3650 // wide_to_long : Wide panel to long format. Less flexible but more 3651 // 3652 // user-friendly than melt. 3653 // 3654 // Notes 3655 // ----- 3656 // For finer-tuned control, see hierarchical indexing documentation along 3657 // with the related stack/unstack methods. 3658 // 3659 // Reference :ref:`the user guide <reshaping.pivot>` for more examples. 3660 // 3661 // Examples 3662 // -------- 3663 // >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 3664 // ... 'two'], 3665 // ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 3666 // ... 'baz': [1, 2, 3, 4, 5, 6], 3667 // ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) 3668 // >>> df 3669 // 3670 // foo bar baz zoo 3671 // 3672 // 0 one A 1 x 3673 // 1 one B 2 y 3674 // 2 one C 3 z 3675 // 3 two A 4 q 3676 // 4 two B 5 w 3677 // 5 two C 6 t 3678 // 3679 // >>> df.pivot(index='foo', columns='bar', values='baz') 3680 // bar A B C 3681 // foo 3682 // one 1 2 3 3683 // two 4 5 6 3684 // 3685 // >>> df.pivot(index='foo', columns='bar')['baz'] 3686 // bar A B C 3687 // foo 3688 // one 1 2 3 3689 // two 4 5 6 3690 // 3691 // >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) 3692 // 3693 // baz zoo 3694 // 3695 // bar A B C A B C 3696 // foo 3697 // one 1 2 3 x y z 3698 // two 4 5 6 q w t 3699 // 3700 // You could also assign a list of column names or a list of index names. 3701 // 3702 // >>> df = pd.DataFrame({ 3703 // ... "lev1": [1, 1, 1, 2, 2, 2], 3704 // ... "lev2": [1, 1, 2, 1, 1, 2], 3705 // ... "lev3": [1, 2, 1, 2, 1, 2], 3706 // ... "lev4": [1, 2, 3, 4, 5, 6], 3707 // ... "values": [0, 1, 2, 3, 4, 5]}) 3708 // >>> df 3709 // 3710 // lev1 lev2 lev3 lev4 values 3711 // 3712 // 0 1 1 1 1 0 3713 // 1 1 1 2 2 1 3714 // 2 1 2 1 3 2 3715 // 3 2 1 2 4 3 3716 // 4 2 1 1 5 4 3717 // 5 2 2 2 6 5 3718 // 3719 // >>> df.pivot(index="lev1", columns=["lev2", "lev3"], values="values") 3720 // lev2 1 2 3721 // lev3 1 2 1 2 3722 // lev1 3723 // 1 0.0 1.0 2.0 NaN 3724 // 2 4.0 3.0 NaN 5.0 3725 // 3726 // >>> df.pivot(index=["lev1", "lev2"], columns=["lev3"], values="values") 3727 // 3728 // lev3 1 2 3729 // 3730 // lev1 lev2 3731 // 3732 // 1 1 0.0 1.0 3733 // 2 2.0 NaN 3734 // 2 1 4.0 3.0 3735 // 2 NaN 5.0 3736 // 3737 // A ValueError is raised if there are any duplicates. 3738 // 3739 // >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], 3740 // ... "bar": ['A', 'A', 'B', 'C'], 3741 // ... "baz": [1, 2, 3, 4]}) 3742 // >>> df 3743 // 3744 // foo bar baz 3745 // 3746 // 0 one A 1 3747 // 1 one A 2 3748 // 2 two B 3 3749 // 3 two C 4 3750 // 3751 // Notice that the first two rows are the same for our `index` 3752 // and `columns` arguments. 3753 // 3754 // >>> df.pivot(index='foo', columns='bar', values='baz') 3755 // Traceback (most recent call last): 3756 // 3757 // ... 3758 // 3759 // ValueError: Index contains duplicate entries, cannot reshape 3760 // 3761 //go:linkname Pivot py.pivot 3762 func Pivot(data *py.Object) *py.Object 3763 3764 // Create a spreadsheet-style pivot table as a DataFrame. 3765 // 3766 // The levels in the pivot table will be stored in MultiIndex objects 3767 // (hierarchical indexes) on the index and columns of the result DataFrame. 3768 // 3769 // Parameters 3770 // ---------- 3771 // data : DataFrame 3772 // values : list-like or scalar, optional 3773 // 3774 // Column or columns to aggregate. 3775 // 3776 // index : column, Grouper, array, or list of the previous 3777 // 3778 // Keys to group by on the pivot table index. If a list is passed, 3779 // it can contain any of the other types (except list). If an array is 3780 // passed, it must be the same length as the data and will be used in 3781 // the same manner as column values. 3782 // 3783 // columns : column, Grouper, array, or list of the previous 3784 // 3785 // Keys to group by on the pivot table column. If a list is passed, 3786 // it can contain any of the other types (except list). If an array is 3787 // passed, it must be the same length as the data and will be used in 3788 // the same manner as column values. 3789 // 3790 // aggfunc : function, list of functions, dict, default "mean" 3791 // 3792 // If a list of functions is passed, the resulting pivot table will have 3793 // hierarchical columns whose top level are the function names 3794 // (inferred from the function objects themselves). 3795 // If a dict is passed, the key is column to aggregate and the value is 3796 // function or list of functions. If ``margin=True``, aggfunc will be 3797 // used to calculate the partial aggregates. 3798 // 3799 // fill_value : scalar, default None 3800 // 3801 // Value to replace missing values with (in the resulting pivot table, 3802 // after aggregation). 3803 // 3804 // margins : bool, default False 3805 // 3806 // If ``margins=True``, special ``All`` columns and rows 3807 // will be added with partial group aggregates across the categories 3808 // on the rows and columns. 3809 // 3810 // dropna : bool, default True 3811 // 3812 // Do not include columns whose entries are all NaN. If True, 3813 // rows with a NaN value in any column will be omitted before 3814 // computing margins. 3815 // 3816 // margins_name : str, default 'All' 3817 // 3818 // Name of the row / column that will contain the totals 3819 // when margins is True. 3820 // 3821 // observed : bool, default False 3822 // 3823 // This only applies if any of the groupers are Categoricals. 3824 // If True: only show observed values for categorical groupers. 3825 // If False: show all values for categorical groupers. 3826 // 3827 // .. deprecated:: 2.2.0 3828 // 3829 // The default value of ``False`` is deprecated and will change to 3830 // ``True`` in a future version of pandas. 3831 // 3832 // sort : bool, default True 3833 // 3834 // Specifies if the result should be sorted. 3835 // 3836 // .. versionadded:: 1.3.0 3837 // 3838 // Returns 3839 // ------- 3840 // DataFrame 3841 // 3842 // An Excel style pivot table. 3843 // 3844 // See Also 3845 // -------- 3846 // DataFrame.pivot : Pivot without aggregation that can handle 3847 // 3848 // non-numeric data. 3849 // 3850 // DataFrame.melt: Unpivot a DataFrame from wide to long format, 3851 // 3852 // optionally leaving identifiers set. 3853 // 3854 // wide_to_long : Wide panel to long format. Less flexible but more 3855 // 3856 // user-friendly than melt. 3857 // 3858 // Notes 3859 // ----- 3860 // Reference :ref:`the user guide <reshaping.pivot>` for more examples. 3861 // 3862 // Examples 3863 // -------- 3864 // >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", 3865 // ... "bar", "bar", "bar", "bar"], 3866 // ... "B": ["one", "one", "one", "two", "two", 3867 // ... "one", "one", "two", "two"], 3868 // ... "C": ["small", "large", "large", "small", 3869 // ... "small", "large", "small", "small", 3870 // ... "large"], 3871 // ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], 3872 // ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) 3873 // >>> df 3874 // 3875 // A B C D E 3876 // 3877 // 0 foo one small 1 2 3878 // 1 foo one large 2 4 3879 // 2 foo one large 2 5 3880 // 3 foo two small 3 5 3881 // 4 foo two small 3 6 3882 // 5 bar one large 4 6 3883 // 6 bar one small 5 8 3884 // 7 bar two small 6 9 3885 // 8 bar two large 7 9 3886 // 3887 // This first example aggregates values by taking the sum. 3888 // 3889 // >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], 3890 // ... columns=['C'], aggfunc="sum") 3891 // >>> table 3892 // C large small 3893 // A B 3894 // bar one 4.0 5.0 3895 // 3896 // two 7.0 6.0 3897 // 3898 // foo one 4.0 1.0 3899 // 3900 // two NaN 6.0 3901 // 3902 // We can also fill missing values using the `fill_value` parameter. 3903 // 3904 // >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], 3905 // ... columns=['C'], aggfunc="sum", fill_value=0) 3906 // >>> table 3907 // C large small 3908 // A B 3909 // bar one 4 5 3910 // 3911 // two 7 6 3912 // 3913 // foo one 4 1 3914 // 3915 // two 0 6 3916 // 3917 // The next example aggregates by taking the mean across multiple columns. 3918 // 3919 // >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], 3920 // ... aggfunc={'D': "mean", 'E': "mean"}) 3921 // >>> table 3922 // 3923 // D E 3924 // 3925 // A C 3926 // bar large 5.500000 7.500000 3927 // 3928 // small 5.500000 8.500000 3929 // 3930 // foo large 2.000000 4.500000 3931 // 3932 // small 2.333333 4.333333 3933 // 3934 // We can also calculate multiple types of aggregations for any given 3935 // value column. 3936 // 3937 // >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], 3938 // ... aggfunc={'D': "mean", 3939 // ... 'E': ["min", "max", "mean"]}) 3940 // >>> table 3941 // 3942 // D E 3943 // mean max mean min 3944 // 3945 // A C 3946 // bar large 5.500000 9 7.500000 6 3947 // 3948 // small 5.500000 9 8.500000 8 3949 // 3950 // foo large 2.000000 5 4.500000 4 3951 // 3952 // small 2.333333 6 4.333333 2 3953 // 3954 //go:linkname PivotTable py.pivot_table 3955 func PivotTable(data *py.Object, values *py.Object, index *py.Object, columns *py.Object, aggfunc *py.Object, fillValue *py.Object, margins *py.Object, dropna *py.Object, marginsName *py.Object, observed *py.Object, sort *py.Object) *py.Object 3956 3957 // Convert categorical variable into dummy/indicator variables. 3958 // 3959 // Each variable is converted in as many 0/1 variables as there are different 3960 // values. Columns in the output are each named after a value; if the input is 3961 // a DataFrame, the name of the original variable is prepended to the value. 3962 // 3963 // Parameters 3964 // ---------- 3965 // data : array-like, Series, or DataFrame 3966 // 3967 // Data of which to get dummy indicators. 3968 // 3969 // prefix : str, list of str, or dict of str, default None 3970 // 3971 // String to append DataFrame column names. 3972 // Pass a list with length equal to the number of columns 3973 // when calling get_dummies on a DataFrame. Alternatively, `prefix` 3974 // can be a dictionary mapping column names to prefixes. 3975 // 3976 // prefix_sep : str, default '_' 3977 // 3978 // If appending prefix, separator/delimiter to use. Or pass a 3979 // list or dictionary as with `prefix`. 3980 // 3981 // dummy_na : bool, default False 3982 // 3983 // Add a column to indicate NaNs, if False NaNs are ignored. 3984 // 3985 // columns : list-like, default None 3986 // 3987 // Column names in the DataFrame to be encoded. 3988 // If `columns` is None then all the columns with 3989 // `object`, `string`, or `category` dtype will be converted. 3990 // 3991 // sparse : bool, default False 3992 // 3993 // Whether the dummy-encoded columns should be backed by 3994 // a :class:`SparseArray` (True) or a regular NumPy array (False). 3995 // 3996 // drop_first : bool, default False 3997 // 3998 // Whether to get k-1 dummies out of k categorical levels by removing the 3999 // first level. 4000 // 4001 // dtype : dtype, default bool 4002 // 4003 // Data type for new columns. Only a single dtype is allowed. 4004 // 4005 // Returns 4006 // ------- 4007 // DataFrame 4008 // 4009 // Dummy-coded data. If `data` contains other columns than the 4010 // dummy-coded one(s), these will be prepended, unaltered, to the result. 4011 // 4012 // See Also 4013 // -------- 4014 // Series.str.get_dummies : Convert Series of strings to dummy codes. 4015 // :func:`~pandas.from_dummies` : Convert dummy codes to categorical “DataFrame“. 4016 // 4017 // Notes 4018 // ----- 4019 // Reference :ref:`the user guide <reshaping.dummies>` for more examples. 4020 // 4021 // Examples 4022 // -------- 4023 // >>> s = pd.Series(list('abca')) 4024 // 4025 // >>> pd.get_dummies(s) 4026 // 4027 // a b c 4028 // 4029 // 0 True False False 4030 // 1 False True False 4031 // 2 False False True 4032 // 3 True False False 4033 // 4034 // >>> s1 = ['a', 'b', np.nan] 4035 // 4036 // >>> pd.get_dummies(s1) 4037 // 4038 // a b 4039 // 4040 // 0 True False 4041 // 1 False True 4042 // 2 False False 4043 // 4044 // >>> pd.get_dummies(s1, dummy_na=True) 4045 // 4046 // a b NaN 4047 // 4048 // 0 True False False 4049 // 1 False True False 4050 // 2 False False True 4051 // 4052 // >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], 4053 // ... 'C': [1, 2, 3]}) 4054 // 4055 // >>> pd.get_dummies(df, prefix=['col1', 'col2']) 4056 // 4057 // C col1_a col1_b col2_a col2_b col2_c 4058 // 4059 // 0 1 True False False True False 4060 // 1 2 False True True False False 4061 // 2 3 True False False False True 4062 // 4063 // >>> pd.get_dummies(pd.Series(list('abcaa'))) 4064 // 4065 // a b c 4066 // 4067 // 0 True False False 4068 // 1 False True False 4069 // 2 False False True 4070 // 3 True False False 4071 // 4 True False False 4072 // 4073 // >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True) 4074 // 4075 // b c 4076 // 4077 // 0 False False 4078 // 1 True False 4079 // 2 False True 4080 // 3 False False 4081 // 4 False False 4082 // 4083 // >>> pd.get_dummies(pd.Series(list('abc')), dtype=float) 4084 // 4085 // a b c 4086 // 4087 // 0 1.0 0.0 0.0 4088 // 1 0.0 1.0 0.0 4089 // 2 0.0 0.0 1.0 4090 // 4091 //go:linkname GetDummies py.get_dummies 4092 func GetDummies(data *py.Object, prefix *py.Object, prefixSep *py.Object, dummyNa *py.Object, columns *py.Object, sparse *py.Object, dropFirst *py.Object, dtype *py.Object) *py.Object 4093 4094 // Create a categorical “DataFrame“ from a “DataFrame“ of dummy variables. 4095 // 4096 // Inverts the operation performed by :func:`~pandas.get_dummies`. 4097 // 4098 // .. versionadded:: 1.5.0 4099 // 4100 // Parameters 4101 // ---------- 4102 // data : DataFrame 4103 // 4104 // Data which contains dummy-coded variables in form of integer columns of 4105 // 1's and 0's. 4106 // 4107 // sep : str, default None 4108 // 4109 // Separator used in the column names of the dummy categories they are 4110 // character indicating the separation of the categorical names from the prefixes. 4111 // For example, if your column names are 'prefix_A' and 'prefix_B', 4112 // you can strip the underscore by specifying sep='_'. 4113 // 4114 // default_category : None, Hashable or dict of Hashables, default None 4115 // 4116 // The default category is the implied category when a value has none of the 4117 // listed categories specified with a one, i.e. if all dummies in a row are 4118 // zero. Can be a single value for all variables or a dict directly mapping 4119 // the default categories to a prefix of a variable. 4120 // 4121 // Returns 4122 // ------- 4123 // DataFrame 4124 // 4125 // Categorical data decoded from the dummy input-data. 4126 // 4127 // Raises 4128 // ------ 4129 // ValueError 4130 // - When the input “DataFrame“ “data“ contains NA values. 4131 // - When the input “DataFrame“ “data“ contains column names with separators 4132 // that do not match the separator specified with “sep“. 4133 // - When a “dict“ passed to “default_category“ does not include an implied 4134 // category for each prefix. 4135 // - When a value in “data“ has more than one category assigned to it. 4136 // - When “default_category=None“ and a value in “data“ has no category 4137 // assigned to it. 4138 // 4139 // TypeError 4140 // - When the input “data“ is not of type “DataFrame“. 4141 // - When the input “DataFrame“ “data“ contains non-dummy data. 4142 // - When the passed “sep“ is of a wrong data type. 4143 // - When the passed “default_category“ is of a wrong data type. 4144 // 4145 // See Also 4146 // -------- 4147 // :func:`~pandas.get_dummies` : Convert “Series“ or “DataFrame“ to dummy codes. 4148 // :class:`~pandas.Categorical` : Represent a categorical variable in classic. 4149 // 4150 // Notes 4151 // ----- 4152 // The columns of the passed dummy data should only include 1's and 0's, 4153 // or boolean values. 4154 // 4155 // Examples 4156 // -------- 4157 // >>> df = pd.DataFrame({"a": [1, 0, 0, 1], "b": [0, 1, 0, 0], 4158 // ... "c": [0, 0, 1, 0]}) 4159 // 4160 // >>> df 4161 // 4162 // a b c 4163 // 4164 // 0 1 0 0 4165 // 1 0 1 0 4166 // 2 0 0 1 4167 // 3 1 0 0 4168 // 4169 // >>> pd.from_dummies(df) 4170 // 0 a 4171 // 1 b 4172 // 2 c 4173 // 3 a 4174 // 4175 // >>> df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], 4176 // ... "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], 4177 // ... "col2_c": [0, 0, 1]}) 4178 // 4179 // >>> df 4180 // 4181 // col1_a col1_b col2_a col2_b col2_c 4182 // 4183 // 0 1 0 0 1 0 4184 // 1 0 1 1 0 0 4185 // 2 1 0 0 0 1 4186 // 4187 // >>> pd.from_dummies(df, sep="_") 4188 // 4189 // col1 col2 4190 // 4191 // 0 a b 4192 // 1 b a 4193 // 2 a c 4194 // 4195 // >>> df = pd.DataFrame({"col1_a": [1, 0, 0], "col1_b": [0, 1, 0], 4196 // ... "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], 4197 // ... "col2_c": [0, 0, 0]}) 4198 // 4199 // >>> df 4200 // 4201 // col1_a col1_b col2_a col2_b col2_c 4202 // 4203 // 0 1 0 0 1 0 4204 // 1 0 1 1 0 0 4205 // 2 0 0 0 0 0 4206 // 4207 // >>> pd.from_dummies(df, sep="_", default_category={"col1": "d", "col2": "e"}) 4208 // 4209 // col1 col2 4210 // 4211 // 0 a b 4212 // 1 b a 4213 // 2 d e 4214 // 4215 //go:linkname FromDummies py.from_dummies 4216 func FromDummies(data *py.Object, sep *py.Object, defaultCategory *py.Object) *py.Object 4217 4218 // Bin values into discrete intervals. 4219 // 4220 // Use `cut` when you need to segment and sort data values into bins. This 4221 // function is also useful for going from a continuous variable to a 4222 // categorical variable. For example, `cut` could convert ages to groups of 4223 // age ranges. Supports binning into an equal number of bins, or a 4224 // pre-specified array of bins. 4225 // 4226 // Parameters 4227 // ---------- 4228 // x : array-like 4229 // 4230 // The input array to be binned. Must be 1-dimensional. 4231 // 4232 // bins : int, sequence of scalars, or IntervalIndex 4233 // 4234 // The criteria to bin by. 4235 // 4236 // * int : Defines the number of equal-width bins in the range of `x`. The 4237 // range of `x` is extended by .1% on each side to include the minimum 4238 // and maximum values of `x`. 4239 // * sequence of scalars : Defines the bin edges allowing for non-uniform 4240 // width. No extension of the range of `x` is done. 4241 // * IntervalIndex : Defines the exact bins to be used. Note that 4242 // IntervalIndex for `bins` must be non-overlapping. 4243 // 4244 // right : bool, default True 4245 // 4246 // Indicates whether `bins` includes the rightmost edge or not. If 4247 // ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]`` 4248 // indicate (1,2], (2,3], (3,4]. This argument is ignored when 4249 // `bins` is an IntervalIndex. 4250 // 4251 // labels : array or False, default None 4252 // 4253 // Specifies the labels for the returned bins. Must be the same length as 4254 // the resulting bins. If False, returns only integer indicators of the 4255 // bins. This affects the type of the output container (see below). 4256 // This argument is ignored when `bins` is an IntervalIndex. If True, 4257 // raises an error. When `ordered=False`, labels must be provided. 4258 // 4259 // retbins : bool, default False 4260 // 4261 // Whether to return the bins or not. Useful when bins is provided 4262 // as a scalar. 4263 // 4264 // precision : int, default 3 4265 // 4266 // The precision at which to store and display the bins labels. 4267 // 4268 // include_lowest : bool, default False 4269 // 4270 // Whether the first interval should be left-inclusive or not. 4271 // 4272 // duplicates : {default 'raise', 'drop'}, optional 4273 // 4274 // If bin edges are not unique, raise ValueError or drop non-uniques. 4275 // 4276 // ordered : bool, default True 4277 // 4278 // Whether the labels are ordered or not. Applies to returned types 4279 // Categorical and Series (with Categorical dtype). If True, 4280 // the resulting categorical will be ordered. If False, the resulting 4281 // categorical will be unordered (labels must be provided). 4282 // 4283 // Returns 4284 // ------- 4285 // out : Categorical, Series, or ndarray 4286 // 4287 // An array-like object representing the respective bin for each value 4288 // of `x`. The type depends on the value of `labels`. 4289 // 4290 // * None (default) : returns a Series for Series `x` or a 4291 // Categorical for all other inputs. The values stored within 4292 // are Interval dtype. 4293 // 4294 // * sequence of scalars : returns a Series for Series `x` or a 4295 // Categorical for all other inputs. The values stored within 4296 // are whatever the type in the sequence is. 4297 // 4298 // * False : returns an ndarray of integers. 4299 // 4300 // bins : numpy.ndarray or IntervalIndex. 4301 // 4302 // The computed or specified bins. Only returned when `retbins=True`. 4303 // For scalar or sequence `bins`, this is an ndarray with the computed 4304 // bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For 4305 // an IntervalIndex `bins`, this is equal to `bins`. 4306 // 4307 // See Also 4308 // -------- 4309 // qcut : Discretize variable into equal-sized buckets based on rank 4310 // 4311 // or based on sample quantiles. 4312 // 4313 // Categorical : Array type for storing data that come from a 4314 // 4315 // fixed set of values. 4316 // 4317 // Series : One-dimensional array with axis labels (including time series). 4318 // IntervalIndex : Immutable Index implementing an ordered, sliceable set. 4319 // 4320 // Notes 4321 // ----- 4322 // Any NA values will be NA in the result. Out of bounds values will be NA in 4323 // the resulting Series or Categorical object. 4324 // 4325 // Reference :ref:`the user guide <reshaping.tile.cut>` for more examples. 4326 // 4327 // Examples 4328 // -------- 4329 // Discretize into three equal-sized bins. 4330 // 4331 // >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3) 4332 // ... # doctest: +ELLIPSIS 4333 // [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... 4334 // Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... 4335 // 4336 // >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True) 4337 // ... # doctest: +ELLIPSIS 4338 // ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... 4339 // Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... 4340 // array([0.994, 3. , 5. , 7. ])) 4341 // 4342 // Discovers the same bins, but assign them specific labels. Notice that 4343 // the returned Categorical's categories are `labels` and is ordered. 4344 // 4345 // >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 4346 // ... 3, labels=["bad", "medium", "good"]) 4347 // ['bad', 'good', 'medium', 'medium', 'good', 'bad'] 4348 // Categories (3, object): ['bad' < 'medium' < 'good'] 4349 // 4350 // “ordered=False“ will result in unordered categories when labels are passed. 4351 // This parameter can be used to allow non-unique labels: 4352 // 4353 // >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, 4354 // ... labels=["B", "A", "B"], ordered=False) 4355 // ['B', 'B', 'A', 'A', 'B', 'B'] 4356 // Categories (2, object): ['A', 'B'] 4357 // 4358 // “labels=False“ implies you just want the bins back. 4359 // 4360 // >>> pd.cut([0, 1, 1, 2], bins=4, labels=False) 4361 // array([0, 1, 1, 3]) 4362 // 4363 // Passing a Series as an input returns a Series with categorical dtype: 4364 // 4365 // >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), 4366 // ... index=['a', 'b', 'c', 'd', 'e']) 4367 // >>> pd.cut(s, 3) 4368 // ... # doctest: +ELLIPSIS 4369 // a (1.992, 4.667] 4370 // b (1.992, 4.667] 4371 // c (4.667, 7.333] 4372 // d (7.333, 10.0] 4373 // e (7.333, 10.0] 4374 // dtype: category 4375 // Categories (3, interval[float64, right]): [(1.992, 4.667] < (4.667, ... 4376 // 4377 // Passing a Series as an input returns a Series with mapping value. 4378 // It is used to map numerically to intervals based on bins. 4379 // 4380 // >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), 4381 // ... index=['a', 'b', 'c', 'd', 'e']) 4382 // >>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False) 4383 // ... # doctest: +ELLIPSIS 4384 // (a 1.0 4385 // 4386 // b 2.0 4387 // c 3.0 4388 // d 4.0 4389 // e NaN 4390 // dtype: float64, 4391 // array([ 0, 2, 4, 6, 8, 10])) 4392 // 4393 // # Use `drop` optional when bins is not unique 4394 // 4395 // >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True, 4396 // ... right=False, duplicates='drop') 4397 // ... # doctest: +ELLIPSIS 4398 // (a 1.0 4399 // 4400 // b 2.0 4401 // c 3.0 4402 // d 3.0 4403 // e NaN 4404 // dtype: float64, 4405 // array([ 0, 2, 4, 6, 10])) 4406 // 4407 // Passing an IntervalIndex for `bins` results in those categories exactly. 4408 // Notice that values not covered by the IntervalIndex are set to NaN. 0 4409 // is to the left of the first bin (which is closed on the right), and 1.5 4410 // falls between two bins. 4411 // 4412 // >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]) 4413 // >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins) 4414 // [NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]] 4415 // Categories (3, interval[int64, right]): [(0, 1] < (2, 3] < (4, 5]] 4416 // 4417 //go:linkname Cut py.cut 4418 func Cut(x *py.Object, bins *py.Object, right *py.Object, labels *py.Object, retbins *py.Object, precision *py.Object, includeLowest *py.Object, duplicates *py.Object, ordered *py.Object) *py.Object 4419 4420 // Quantile-based discretization function. 4421 // 4422 // Discretize variable into equal-sized buckets based on rank or based 4423 // on sample quantiles. For example 1000 values for 10 quantiles would 4424 // produce a Categorical object indicating quantile membership for each data point. 4425 // 4426 // Parameters 4427 // ---------- 4428 // x : 1d ndarray or Series 4429 // q : int or list-like of float 4430 // 4431 // Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately 4432 // array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles. 4433 // 4434 // labels : array or False, default None 4435 // 4436 // Used as labels for the resulting bins. Must be of the same length as 4437 // the resulting bins. If False, return only integer indicators of the 4438 // bins. If True, raises an error. 4439 // 4440 // retbins : bool, optional 4441 // 4442 // Whether to return the (bins, labels) or not. Can be useful if bins 4443 // is given as a scalar. 4444 // 4445 // precision : int, optional 4446 // 4447 // The precision at which to store and display the bins labels. 4448 // 4449 // duplicates : {default 'raise', 'drop'}, optional 4450 // 4451 // If bin edges are not unique, raise ValueError or drop non-uniques. 4452 // 4453 // Returns 4454 // ------- 4455 // out : Categorical or Series or array of integers if labels is False 4456 // 4457 // The return type (Categorical or Series) depends on the input: a Series 4458 // of type category if input is a Series else Categorical. Bins are 4459 // represented as categories when categorical data is returned. 4460 // 4461 // bins : ndarray of floats 4462 // 4463 // Returned only if `retbins` is True. 4464 // 4465 // Notes 4466 // ----- 4467 // Out of bounds values will be NA in the resulting Categorical object 4468 // 4469 // Examples 4470 // -------- 4471 // >>> pd.qcut(range(5), 4) 4472 // ... # doctest: +ELLIPSIS 4473 // [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]] 4474 // Categories (4, interval[float64, right]): [(-0.001, 1.0] < (1.0, 2.0] ... 4475 // 4476 // >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"]) 4477 // ... # doctest: +SKIP 4478 // [good, good, medium, bad, bad] 4479 // Categories (3, object): [good < medium < bad] 4480 // 4481 // >>> pd.qcut(range(5), 4, labels=False) 4482 // array([0, 0, 1, 2, 3]) 4483 // 4484 //go:linkname Qcut py.qcut 4485 func Qcut(x *py.Object, q *py.Object, labels *py.Object, retbins *py.Object, precision *py.Object, duplicates *py.Object) *py.Object 4486 4487 // Read a table of fixed-width formatted lines into DataFrame. 4488 // 4489 // Also supports optionally iterating or breaking of the file 4490 // into chunks. 4491 // 4492 // Additional help can be found in the `online docs for IO Tools 4493 // <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_. 4494 // 4495 // Parameters 4496 // ---------- 4497 // filepath_or_buffer : str, path object, or file-like object 4498 // 4499 // String, path object (implementing ``os.PathLike[str]``), or file-like 4500 // object implementing a text ``read()`` function.The string could be a URL. 4501 // Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is 4502 // expected. A local file could be: 4503 // ``file://localhost/path/to/table.csv``. 4504 // 4505 // colspecs : list of tuple (int, int) or 'infer'. optional 4506 // 4507 // A list of tuples giving the extents of the fixed-width 4508 // fields of each line as half-open intervals (i.e., [from, to[ ). 4509 // String value 'infer' can be used to instruct the parser to try 4510 // detecting the column specifications from the first 100 rows of 4511 // the data which are not being skipped via skiprows (default='infer'). 4512 // 4513 // widths : list of int, optional 4514 // 4515 // A list of field widths which can be used instead of 'colspecs' if 4516 // the intervals are contiguous. 4517 // 4518 // infer_nrows : int, default 100 4519 // 4520 // The number of rows to consider when letting the parser determine the 4521 // `colspecs`. 4522 // 4523 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 4524 // 4525 // Back-end data type applied to the resultant :class:`DataFrame` 4526 // (still experimental). Behaviour is as follows: 4527 // 4528 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 4529 // (default). 4530 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 4531 // DataFrame. 4532 // 4533 // .. versionadded:: 2.0 4534 // 4535 // **kwds : optional 4536 // 4537 // Optional keyword arguments can be passed to ``TextFileReader``. 4538 // 4539 // Returns 4540 // ------- 4541 // DataFrame or TextFileReader 4542 // 4543 // A comma-separated values (csv) file is returned as two-dimensional 4544 // data structure with labeled axes. 4545 // 4546 // See Also 4547 // -------- 4548 // DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file. 4549 // read_csv : Read a comma-separated values (csv) file into DataFrame. 4550 // 4551 // Examples 4552 // -------- 4553 // >>> pd.read_fwf('data.csv') # doctest: +SKIP 4554 // 4555 //go:linkname ReadFwf py.read_fwf 4556 func ReadFwf(filepathOrBuffer *py.Object) *py.Object 4557 4558 // Read general delimited file into DataFrame. 4559 // 4560 // Also supports optionally iterating or breaking of the file 4561 // into chunks. 4562 // 4563 // Additional help can be found in the online docs for 4564 // `IO Tools <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_. 4565 // 4566 // Parameters 4567 // ---------- 4568 // filepath_or_buffer : str, path object or file-like object 4569 // 4570 // Any valid string path is acceptable. The string could be a URL. Valid 4571 // URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is 4572 // expected. A local file could be: file://localhost/path/to/table.csv. 4573 // 4574 // If you want to pass in a path object, pandas accepts any ``os.PathLike``. 4575 // 4576 // By file-like object, we refer to objects with a ``read()`` method, such as 4577 // a file handle (e.g. via builtin ``open`` function) or ``StringIO``. 4578 // 4579 // sep : str, default '\\t' (tab-stop) 4580 // 4581 // Character or regex pattern to treat as the delimiter. If ``sep=None``, the 4582 // C engine cannot automatically detect 4583 // the separator, but the Python parsing engine can, meaning the latter will 4584 // be used and automatically detect the separator from only the first valid 4585 // row of the file by Python's builtin sniffer tool, ``csv.Sniffer``. 4586 // In addition, separators longer than 1 character and different from 4587 // ``'\s+'`` will be interpreted as regular expressions and will also force 4588 // the use of the Python parsing engine. Note that regex delimiters are prone 4589 // to ignoring quoted data. Regex example: ``'\r\t'``. 4590 // 4591 // delimiter : str, optional 4592 // 4593 // Alias for ``sep``. 4594 // 4595 // header : int, Sequence of int, 'infer' or None, default 'infer' 4596 // 4597 // Row number(s) containing column labels and marking the start of the 4598 // data (zero-indexed). Default behavior is to infer the column names: if no ``names`` 4599 // are passed the behavior is identical to ``header=0`` and column 4600 // names are inferred from the first line of the file, if column 4601 // names are passed explicitly to ``names`` then the behavior is identical to 4602 // ``header=None``. Explicitly pass ``header=0`` to be able to 4603 // replace existing names. The header can be a list of integers that 4604 // specify row locations for a :class:`~pandas.MultiIndex` on the columns 4605 // e.g. ``[0, 1, 3]``. Intervening rows that are not specified will be 4606 // skipped (e.g. 2 in this example is skipped). Note that this 4607 // parameter ignores commented lines and empty lines if 4608 // ``skip_blank_lines=True``, so ``header=0`` denotes the first line of 4609 // data rather than the first line of the file. 4610 // 4611 // names : Sequence of Hashable, optional 4612 // 4613 // Sequence of column labels to apply. If the file contains a header row, 4614 // then you should explicitly pass ``header=0`` to override the column names. 4615 // Duplicates in this list are not allowed. 4616 // 4617 // index_col : Hashable, Sequence of Hashable or False, optional 4618 // 4619 // Column(s) to use as row label(s), denoted either by column labels or column 4620 // indices. If a sequence of labels or indices is given, :class:`~pandas.MultiIndex` 4621 // will be formed for the row labels. 4622 // 4623 // Note: ``index_col=False`` can be used to force pandas to *not* use the first 4624 // column as the index, e.g., when you have a malformed file with delimiters at 4625 // the end of each line. 4626 // 4627 // usecols : Sequence of Hashable or Callable, optional 4628 // 4629 // Subset of columns to select, denoted either by column labels or column indices. 4630 // If list-like, all elements must either 4631 // be positional (i.e. integer indices into the document columns) or strings 4632 // that correspond to column names provided either by the user in ``names`` or 4633 // inferred from the document header row(s). If ``names`` are given, the document 4634 // header row(s) are not taken into account. For example, a valid list-like 4635 // ``usecols`` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``. 4636 // Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. 4637 // To instantiate a :class:`~pandas.DataFrame` from ``data`` with element order 4638 // preserved use ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` 4639 // for columns in ``['foo', 'bar']`` order or 4640 // ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]`` 4641 // for ``['bar', 'foo']`` order. 4642 // 4643 // If callable, the callable function will be evaluated against the column 4644 // names, returning names where the callable function evaluates to ``True``. An 4645 // example of a valid callable argument would be ``lambda x: x.upper() in 4646 // ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster 4647 // parsing time and lower memory usage. 4648 // 4649 // dtype : dtype or dict of {Hashable : dtype}, optional 4650 // 4651 // Data type(s) to apply to either the whole dataset or individual columns. 4652 // E.g., ``{'a': np.float64, 'b': np.int32, 'c': 'Int64'}`` 4653 // Use ``str`` or ``object`` together with suitable ``na_values`` settings 4654 // to preserve and not interpret ``dtype``. 4655 // If ``converters`` are specified, they will be applied INSTEAD 4656 // of ``dtype`` conversion. 4657 // 4658 // .. versionadded:: 1.5.0 4659 // 4660 // Support for ``defaultdict`` was added. Specify a ``defaultdict`` as input where 4661 // the default determines the ``dtype`` of the columns which are not explicitly 4662 // listed. 4663 // 4664 // engine : {'c', 'python', 'pyarrow'}, optional 4665 // 4666 // Parser engine to use. The C and pyarrow engines are faster, while the python engine 4667 // is currently more feature-complete. Multithreading is currently only supported by 4668 // the pyarrow engine. 4669 // 4670 // .. versionadded:: 1.4.0 4671 // 4672 // The 'pyarrow' engine was added as an *experimental* engine, and some features 4673 // are unsupported, or may not work correctly, with this engine. 4674 // 4675 // converters : dict of {Hashable : Callable}, optional 4676 // 4677 // Functions for converting values in specified columns. Keys can either 4678 // be column labels or column indices. 4679 // 4680 // true_values : list, optional 4681 // 4682 // Values to consider as ``True`` in addition to case-insensitive variants of 'True'. 4683 // 4684 // false_values : list, optional 4685 // 4686 // Values to consider as ``False`` in addition to case-insensitive variants of 'False'. 4687 // 4688 // skipinitialspace : bool, default False 4689 // 4690 // Skip spaces after delimiter. 4691 // 4692 // skiprows : int, list of int or Callable, optional 4693 // 4694 // Line numbers to skip (0-indexed) or number of lines to skip (``int``) 4695 // at the start of the file. 4696 // 4697 // If callable, the callable function will be evaluated against the row 4698 // indices, returning ``True`` if the row should be skipped and ``False`` otherwise. 4699 // An example of a valid callable argument would be ``lambda x: x in [0, 2]``. 4700 // 4701 // skipfooter : int, default 0 4702 // 4703 // Number of lines at bottom of file to skip (Unsupported with ``engine='c'``). 4704 // 4705 // nrows : int, optional 4706 // 4707 // Number of rows of file to read. Useful for reading pieces of large files. 4708 // 4709 // na_values : Hashable, Iterable of Hashable or dict of {Hashable : Iterable}, optional 4710 // 4711 // Additional strings to recognize as ``NA``/``NaN``. If ``dict`` passed, specific 4712 // per-column ``NA`` values. By default the following values are interpreted as 4713 // ``NaN``: " ", "#N/A", "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", 4714 // "1.#IND", "1.#QNAN", "<NA>", "N/A", "NA", "NULL", "NaN", "None", 4715 // "n/a", "nan", "null ". 4716 // 4717 // keep_default_na : bool, default True 4718 // 4719 // Whether or not to include the default ``NaN`` values when parsing the data. 4720 // Depending on whether ``na_values`` is passed in, the behavior is as follows: 4721 // 4722 // * If ``keep_default_na`` is ``True``, and ``na_values`` are specified, ``na_values`` 4723 // is appended to the default ``NaN`` values used for parsing. 4724 // * If ``keep_default_na`` is ``True``, and ``na_values`` are not specified, only 4725 // the default ``NaN`` values are used for parsing. 4726 // * If ``keep_default_na`` is ``False``, and ``na_values`` are specified, only 4727 // the ``NaN`` values specified ``na_values`` are used for parsing. 4728 // * If ``keep_default_na`` is ``False``, and ``na_values`` are not specified, no 4729 // strings will be parsed as ``NaN``. 4730 // 4731 // Note that if ``na_filter`` is passed in as ``False``, the ``keep_default_na`` and 4732 // ``na_values`` parameters will be ignored. 4733 // 4734 // na_filter : bool, default True 4735 // 4736 // Detect missing value markers (empty strings and the value of ``na_values``). In 4737 // data without any ``NA`` values, passing ``na_filter=False`` can improve the 4738 // performance of reading a large file. 4739 // 4740 // verbose : bool, default False 4741 // 4742 // Indicate number of ``NA`` values placed in non-numeric columns. 4743 // 4744 // .. deprecated:: 2.2.0 4745 // 4746 // skip_blank_lines : bool, default True 4747 // 4748 // If ``True``, skip over blank lines rather than interpreting as ``NaN`` values. 4749 // 4750 // parse_dates : bool, list of Hashable, list of lists or dict of {Hashable : list}, default False 4751 // 4752 // The behavior is as follows: 4753 // 4754 // * ``bool``. If ``True`` -> try parsing the index. Note: Automatically set to 4755 // ``True`` if ``date_format`` or ``date_parser`` arguments have been passed. 4756 // * ``list`` of ``int`` or names. e.g. If ``[1, 2, 3]`` -> try parsing columns 1, 2, 3 4757 // each as a separate date column. 4758 // * ``list`` of ``list``. e.g. If ``[[1, 3]]`` -> combine columns 1 and 3 and parse 4759 // as a single date column. Values are joined with a space before parsing. 4760 // * ``dict``, e.g. ``{'foo' : [1, 3]}`` -> parse columns 1, 3 as date and call 4761 // result 'foo'. Values are joined with a space before parsing. 4762 // 4763 // If a column or index cannot be represented as an array of ``datetime``, 4764 // say because of an unparsable value or a mixture of timezones, the column 4765 // or index will be returned unaltered as an ``object`` data type. For 4766 // non-standard ``datetime`` parsing, use :func:`~pandas.to_datetime` after 4767 // :func:`~pandas.read_csv`. 4768 // 4769 // Note: A fast-path exists for iso8601-formatted dates. 4770 // 4771 // infer_datetime_format : bool, default False 4772 // 4773 // If ``True`` and ``parse_dates`` is enabled, pandas will attempt to infer the 4774 // format of the ``datetime`` strings in the columns, and if it can be inferred, 4775 // switch to a faster method of parsing them. In some cases this can increase 4776 // the parsing speed by 5-10x. 4777 // 4778 // .. deprecated:: 2.0.0 4779 // A strict version of this argument is now the default, passing it has no effect. 4780 // 4781 // keep_date_col : bool, default False 4782 // 4783 // If ``True`` and ``parse_dates`` specifies combining multiple columns then 4784 // keep the original columns. 4785 // 4786 // date_parser : Callable, optional 4787 // 4788 // Function to use for converting a sequence of string columns to an array of 4789 // ``datetime`` instances. The default uses ``dateutil.parser.parser`` to do the 4790 // conversion. pandas will try to call ``date_parser`` in three different ways, 4791 // advancing to the next if an exception occurs: 1) Pass one or more arrays 4792 // (as defined by ``parse_dates``) as arguments; 2) concatenate (row-wise) the 4793 // string values from the columns defined by ``parse_dates`` into a single array 4794 // and pass that; and 3) call ``date_parser`` once for each row using one or 4795 // more strings (corresponding to the columns defined by ``parse_dates``) as 4796 // arguments. 4797 // 4798 // .. deprecated:: 2.0.0 4799 // Use ``date_format`` instead, or read in as ``object`` and then apply 4800 // :func:`~pandas.to_datetime` as-needed. 4801 // 4802 // date_format : str or dict of column -> format, optional 4803 // 4804 // Format to use for parsing dates when used in conjunction with ``parse_dates``. 4805 // The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. See 4806 // `strftime documentation 4807 // <https://docs.python.org/3/library/datetime.html 4808 // #strftime-and-strptime-behavior>`_ for more information on choices, though 4809 // note that :const:`"%f"` will parse all the way up to nanoseconds. 4810 // You can also pass: 4811 // 4812 // - "ISO8601", to parse any `ISO8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ 4813 // time string (not necessarily in exactly the same format); 4814 // - "mixed", to infer the format for each element individually. This is risky, 4815 // and you should probably use it along with `dayfirst`. 4816 // 4817 // .. versionadded:: 2.0.0 4818 // 4819 // dayfirst : bool, default False 4820 // 4821 // DD/MM format dates, international and European format. 4822 // 4823 // cache_dates : bool, default True 4824 // 4825 // If ``True``, use a cache of unique, converted dates to apply the ``datetime`` 4826 // conversion. May produce significant speed-up when parsing duplicate 4827 // date strings, especially ones with timezone offsets. 4828 // 4829 // iterator : bool, default False 4830 // 4831 // Return ``TextFileReader`` object for iteration or getting chunks with 4832 // ``get_chunk()``. 4833 // 4834 // chunksize : int, optional 4835 // 4836 // Number of lines to read from the file per chunk. Passing a value will cause the 4837 // function to return a ``TextFileReader`` object for iteration. 4838 // See the `IO Tools docs 4839 // <https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_ 4840 // for more information on ``iterator`` and ``chunksize``. 4841 // 4842 // compression : str or dict, default 'infer' 4843 // 4844 // For on-the-fly decompression of on-disk data. If 'infer' and 'filepath_or_buffer' is 4845 // path-like, then detect compression from the following extensions: '.gz', 4846 // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' 4847 // (otherwise no compression). 4848 // If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. 4849 // Set to ``None`` for no decompression. 4850 // Can also be a dict with key ``'method'`` set 4851 // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and 4852 // other key-value pairs are forwarded to 4853 // ``zipfile.ZipFile``, ``gzip.GzipFile``, 4854 // ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or 4855 // ``tarfile.TarFile``, respectively. 4856 // As an example, the following could be passed for Zstandard decompression using a 4857 // custom compression dictionary: 4858 // ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. 4859 // 4860 // .. versionadded:: 1.5.0 4861 // Added support for `.tar` files. 4862 // 4863 // .. versionchanged:: 1.4.0 Zstandard support. 4864 // 4865 // thousands : str (length 1), optional 4866 // 4867 // Character acting as the thousands separator in numerical values. 4868 // 4869 // decimal : str (length 1), default '.' 4870 // 4871 // Character to recognize as decimal point (e.g., use ',' for European data). 4872 // 4873 // lineterminator : str (length 1), optional 4874 // 4875 // Character used to denote a line break. Only valid with C parser. 4876 // 4877 // quotechar : str (length 1), optional 4878 // 4879 // Character used to denote the start and end of a quoted item. Quoted 4880 // items can include the ``delimiter`` and it will be ignored. 4881 // 4882 // quoting : {0 or csv.QUOTE_MINIMAL, 1 or csv.QUOTE_ALL, 2 or csv.QUOTE_NONNUMERIC, 3 or csv.QUOTE_NONE}, default csv.QUOTE_MINIMAL 4883 // 4884 // Control field quoting behavior per ``csv.QUOTE_*`` constants. Default is 4885 // ``csv.QUOTE_MINIMAL`` (i.e., 0) which implies that only fields containing special 4886 // characters are quoted (e.g., characters defined in ``quotechar``, ``delimiter``, 4887 // or ``lineterminator``. 4888 // 4889 // doublequote : bool, default True 4890 // 4891 // When ``quotechar`` is specified and ``quoting`` is not ``QUOTE_NONE``, indicate 4892 // whether or not to interpret two consecutive ``quotechar`` elements INSIDE a 4893 // field as a single ``quotechar`` element. 4894 // 4895 // escapechar : str (length 1), optional 4896 // 4897 // Character used to escape other characters. 4898 // 4899 // comment : str (length 1), optional 4900 // 4901 // Character indicating that the remainder of line should not be parsed. 4902 // If found at the beginning 4903 // of a line, the line will be ignored altogether. This parameter must be a 4904 // single character. Like empty lines (as long as ``skip_blank_lines=True``), 4905 // fully commented lines are ignored by the parameter ``header`` but not by 4906 // ``skiprows``. For example, if ``comment='#'``, parsing 4907 // ``#empty\na,b,c\n1,2,3`` with ``header=0`` will result in ``'a,b,c'`` being 4908 // treated as the header. 4909 // 4910 // encoding : str, optional, default 'utf-8' 4911 // 4912 // Encoding to use for UTF when reading/writing (ex. ``'utf-8'``). `List of Python 4913 // standard encodings 4914 // <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ . 4915 // 4916 // encoding_errors : str, optional, default 'strict' 4917 // 4918 // How encoding errors are treated. `List of possible values 4919 // <https://docs.python.org/3/library/codecs.html#error-handlers>`_ . 4920 // 4921 // .. versionadded:: 1.3.0 4922 // 4923 // dialect : str or csv.Dialect, optional 4924 // 4925 // If provided, this parameter will override values (default or not) for the 4926 // following parameters: ``delimiter``, ``doublequote``, ``escapechar``, 4927 // ``skipinitialspace``, ``quotechar``, and ``quoting``. If it is necessary to 4928 // override values, a ``ParserWarning`` will be issued. See ``csv.Dialect`` 4929 // documentation for more details. 4930 // 4931 // on_bad_lines : {'error', 'warn', 'skip'} or Callable, default 'error' 4932 // 4933 // Specifies what to do upon encountering a bad line (a line with too many fields). 4934 // Allowed values are : 4935 // 4936 // - ``'error'``, raise an Exception when a bad line is encountered. 4937 // - ``'warn'``, raise a warning when a bad line is encountered and skip that line. 4938 // - ``'skip'``, skip bad lines without raising or warning when they are encountered. 4939 // 4940 // .. versionadded:: 1.3.0 4941 // 4942 // .. versionadded:: 1.4.0 4943 // 4944 // - Callable, function with signature 4945 // ``(bad_line: list[str]) -> list[str] | None`` that will process a single 4946 // bad line. ``bad_line`` is a list of strings split by the ``sep``. 4947 // If the function returns ``None``, the bad line will be ignored. 4948 // If the function returns a new ``list`` of strings with more elements than 4949 // expected, a ``ParserWarning`` will be emitted while dropping extra elements. 4950 // Only supported when ``engine='python'`` 4951 // 4952 // .. versionchanged:: 2.2.0 4953 // 4954 // - Callable, function with signature 4955 // as described in `pyarrow documentation 4956 // <https://arrow.apache.org/docs/python/generated/pyarrow.csv.ParseOptions.html 4957 // #pyarrow.csv.ParseOptions.invalid_row_handler>`_ when ``engine='pyarrow'`` 4958 // 4959 // delim_whitespace : bool, default False 4960 // 4961 // Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be 4962 // used as the ``sep`` delimiter. Equivalent to setting ``sep='\s+'``. If this option 4963 // is set to ``True``, nothing should be passed in for the ``delimiter`` 4964 // parameter. 4965 // 4966 // .. deprecated:: 2.2.0 4967 // Use ``sep="\s+"`` instead. 4968 // 4969 // low_memory : bool, default True 4970 // 4971 // Internally process the file in chunks, resulting in lower memory use 4972 // while parsing, but possibly mixed type inference. To ensure no mixed 4973 // types either set ``False``, or specify the type with the ``dtype`` parameter. 4974 // Note that the entire file is read into a single :class:`~pandas.DataFrame` 4975 // regardless, use the ``chunksize`` or ``iterator`` parameter to return the data in 4976 // chunks. (Only valid with C parser). 4977 // 4978 // memory_map : bool, default False 4979 // 4980 // If a filepath is provided for ``filepath_or_buffer``, map the file object 4981 // directly onto memory and access the data directly from there. Using this 4982 // option can improve performance because there is no longer any I/O overhead. 4983 // 4984 // float_precision : {'high', 'legacy', 'round_trip'}, optional 4985 // 4986 // Specifies which converter the C engine should use for floating-point 4987 // values. The options are ``None`` or ``'high'`` for the ordinary converter, 4988 // ``'legacy'`` for the original lower precision pandas converter, and 4989 // ``'round_trip'`` for the round-trip converter. 4990 // 4991 // storage_options : dict, optional 4992 // 4993 // Extra options that make sense for a particular storage connection, e.g. 4994 // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs 4995 // are forwarded to ``urllib.request.Request`` as header options. For other 4996 // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are 4997 // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more 4998 // details, and for more examples on storage options refer `here 4999 // <https://pandas.pydata.org/docs/user_guide/io.html? 5000 // highlight=storage_options#reading-writing-remote-files>`_. 5001 // 5002 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 5003 // 5004 // Back-end data type applied to the resultant :class:`DataFrame` 5005 // (still experimental). Behaviour is as follows: 5006 // 5007 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 5008 // (default). 5009 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 5010 // DataFrame. 5011 // 5012 // .. versionadded:: 2.0 5013 // 5014 // Returns 5015 // ------- 5016 // DataFrame or TextFileReader 5017 // 5018 // A comma-separated values (csv) file is returned as two-dimensional 5019 // data structure with labeled axes. 5020 // 5021 // See Also 5022 // -------- 5023 // DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file. 5024 // read_csv : Read a comma-separated values (csv) file into DataFrame. 5025 // read_fwf : Read a table of fixed-width formatted lines into DataFrame. 5026 // 5027 // Examples 5028 // -------- 5029 // >>> pd.read_table('data.csv') # doctest: +SKIP 5030 // 5031 //go:linkname ReadTable py.read_table 5032 func ReadTable(filepathOrBuffer *py.Object) *py.Object 5033 5034 // Load pickled pandas object (or any object) from file. 5035 // 5036 // .. warning:: 5037 // 5038 // Loading pickled data received from untrusted sources can be 5039 // unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__. 5040 // 5041 // Parameters 5042 // ---------- 5043 // filepath_or_buffer : str, path object, or file-like object 5044 // 5045 // String, path object (implementing ``os.PathLike[str]``), or file-like 5046 // object implementing a binary ``readlines()`` function. 5047 // Also accepts URL. URL is not limited to S3 and GCS. 5048 // 5049 // compression : str or dict, default 'infer' 5050 // 5051 // For on-the-fly decompression of on-disk data. If 'infer' and 'filepath_or_buffer' is 5052 // path-like, then detect compression from the following extensions: '.gz', 5053 // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' 5054 // (otherwise no compression). 5055 // If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. 5056 // Set to ``None`` for no decompression. 5057 // Can also be a dict with key ``'method'`` set 5058 // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and 5059 // other key-value pairs are forwarded to 5060 // ``zipfile.ZipFile``, ``gzip.GzipFile``, 5061 // ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or 5062 // ``tarfile.TarFile``, respectively. 5063 // As an example, the following could be passed for Zstandard decompression using a 5064 // custom compression dictionary: 5065 // ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. 5066 // 5067 // .. versionadded:: 1.5.0 5068 // Added support for `.tar` files. 5069 // 5070 // .. versionchanged:: 1.4.0 Zstandard support. 5071 // 5072 // storage_options : dict, optional 5073 // 5074 // Extra options that make sense for a particular storage connection, e.g. 5075 // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs 5076 // are forwarded to ``urllib.request.Request`` as header options. For other 5077 // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are 5078 // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more 5079 // details, and for more examples on storage options refer `here 5080 // <https://pandas.pydata.org/docs/user_guide/io.html? 5081 // highlight=storage_options#reading-writing-remote-files>`_. 5082 // 5083 // Returns 5084 // ------- 5085 // same type as object stored in file 5086 // 5087 // See Also 5088 // -------- 5089 // DataFrame.to_pickle : Pickle (serialize) DataFrame object to file. 5090 // Series.to_pickle : Pickle (serialize) Series object to file. 5091 // read_hdf : Read HDF5 file into a DataFrame. 5092 // read_sql : Read SQL query or database table into a DataFrame. 5093 // read_parquet : Load a parquet object, returning a DataFrame. 5094 // 5095 // Notes 5096 // ----- 5097 // read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3 5098 // provided the object was serialized with to_pickle. 5099 // 5100 // Examples 5101 // -------- 5102 // >>> original_df = pd.DataFrame( 5103 // ... {"foo": range(5), "bar": range(5, 10)} 5104 // ... ) # doctest: +SKIP 5105 // >>> original_df # doctest: +SKIP 5106 // 5107 // foo bar 5108 // 5109 // 0 0 5 5110 // 1 1 6 5111 // 2 2 7 5112 // 3 3 8 5113 // 4 4 9 5114 // >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP 5115 // 5116 // >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP 5117 // >>> unpickled_df # doctest: +SKIP 5118 // 5119 // foo bar 5120 // 5121 // 0 0 5 5122 // 1 1 6 5123 // 2 2 7 5124 // 3 3 8 5125 // 4 4 9 5126 // 5127 //go:linkname ReadPickle py.read_pickle 5128 func ReadPickle(filepathOrBuffer *py.Object, compression *py.Object, storageOptions *py.Object) *py.Object 5129 5130 // Pickle (serialize) object to file. 5131 // 5132 // Parameters 5133 // ---------- 5134 // obj : any object 5135 // 5136 // Any python object. 5137 // 5138 // filepath_or_buffer : str, path object, or file-like object 5139 // 5140 // String, path object (implementing ``os.PathLike[str]``), or file-like 5141 // object implementing a binary ``write()`` function. 5142 // Also accepts URL. URL has to be of S3 or GCS. 5143 // 5144 // compression : str or dict, default 'infer' 5145 // 5146 // For on-the-fly compression of the output data. If 'infer' and 'filepath_or_buffer' is 5147 // path-like, then detect compression from the following extensions: '.gz', 5148 // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' 5149 // (otherwise no compression). 5150 // Set to ``None`` for no compression. 5151 // Can also be a dict with key ``'method'`` set 5152 // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and 5153 // other key-value pairs are forwarded to 5154 // ``zipfile.ZipFile``, ``gzip.GzipFile``, 5155 // ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or 5156 // ``tarfile.TarFile``, respectively. 5157 // As an example, the following could be passed for faster compression and to create 5158 // a reproducible gzip archive: 5159 // ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. 5160 // 5161 // .. versionadded:: 1.5.0 5162 // Added support for `.tar` files. 5163 // 5164 // .. versionchanged:: 1.4.0 Zstandard support. 5165 // 5166 // protocol : int 5167 // 5168 // Int which indicates which protocol should be used by the pickler, 5169 // default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible 5170 // values for this parameter depend on the version of Python. For Python 5171 // 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. 5172 // For Python >= 3.4, 4 is a valid value. A negative value for the 5173 // protocol parameter is equivalent to setting its value to 5174 // HIGHEST_PROTOCOL. 5175 // 5176 // storage_options : dict, optional 5177 // 5178 // Extra options that make sense for a particular storage connection, e.g. 5179 // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs 5180 // are forwarded to ``urllib.request.Request`` as header options. For other 5181 // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are 5182 // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more 5183 // details, and for more examples on storage options refer `here 5184 // <https://pandas.pydata.org/docs/user_guide/io.html? 5185 // highlight=storage_options#reading-writing-remote-files>`_. 5186 // 5187 // .. [1] https://docs.python.org/3/library/pickle.html 5188 // 5189 // See Also 5190 // -------- 5191 // read_pickle : Load pickled pandas object (or any object) from file. 5192 // DataFrame.to_hdf : Write DataFrame to an HDF5 file. 5193 // DataFrame.to_sql : Write DataFrame to a SQL database. 5194 // DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 5195 // 5196 // Examples 5197 // -------- 5198 // >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)}) # doctest: +SKIP 5199 // >>> original_df # doctest: +SKIP 5200 // 5201 // foo bar 5202 // 5203 // 0 0 5 5204 // 1 1 6 5205 // 2 2 7 5206 // 3 3 8 5207 // 4 4 9 5208 // >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP 5209 // 5210 // >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP 5211 // >>> unpickled_df # doctest: +SKIP 5212 // 5213 // foo bar 5214 // 5215 // 0 0 5 5216 // 1 1 6 5217 // 2 2 7 5218 // 3 3 8 5219 // 4 4 9 5220 // 5221 //go:linkname ToPickle py.to_pickle 5222 func ToPickle(obj *py.Object, filepathOrBuffer *py.Object, compression *py.Object, protocol *py.Object, storageOptions *py.Object) *py.Object 5223 5224 // Read from the store, close it if we opened it. 5225 // 5226 // Retrieve pandas object stored in file, optionally based on where 5227 // criteria. 5228 // 5229 // .. warning:: 5230 // 5231 // Pandas uses PyTables for reading and writing HDF5 files, which allows 5232 // serializing object-dtype data with pickle when using the "fixed" format. 5233 // Loading pickled data received from untrusted sources can be unsafe. 5234 // 5235 // See: https://docs.python.org/3/library/pickle.html for more. 5236 // 5237 // Parameters 5238 // ---------- 5239 // path_or_buf : str, path object, pandas.HDFStore 5240 // 5241 // Any valid string path is acceptable. Only supports the local file system, 5242 // remote URLs and file-like objects are not supported. 5243 // 5244 // If you want to pass in a path object, pandas accepts any 5245 // ``os.PathLike``. 5246 // 5247 // Alternatively, pandas accepts an open :class:`pandas.HDFStore` object. 5248 // 5249 // key : object, optional 5250 // 5251 // The group identifier in the store. Can be omitted if the HDF file 5252 // contains a single pandas object. 5253 // 5254 // mode : {'r', 'r+', 'a'}, default 'r' 5255 // 5256 // Mode to use when opening the file. Ignored if path_or_buf is a 5257 // :class:`pandas.HDFStore`. Default is 'r'. 5258 // 5259 // errors : str, default 'strict' 5260 // 5261 // Specifies how encoding and decoding errors are to be handled. 5262 // See the errors argument for :func:`open` for a full list 5263 // of options. 5264 // 5265 // where : list, optional 5266 // 5267 // A list of Term (or convertible) objects. 5268 // 5269 // start : int, optional 5270 // 5271 // Row number to start selection. 5272 // 5273 // stop : int, optional 5274 // 5275 // Row number to stop selection. 5276 // 5277 // columns : list, optional 5278 // 5279 // A list of columns names to return. 5280 // 5281 // iterator : bool, optional 5282 // 5283 // Return an iterator object. 5284 // 5285 // chunksize : int, optional 5286 // 5287 // Number of rows to include in an iteration when using an iterator. 5288 // 5289 // **kwargs 5290 // 5291 // Additional keyword arguments passed to HDFStore. 5292 // 5293 // Returns 5294 // ------- 5295 // object 5296 // 5297 // The selected object. Return type depends on the object stored. 5298 // 5299 // See Also 5300 // -------- 5301 // DataFrame.to_hdf : Write a HDF file from a DataFrame. 5302 // HDFStore : Low-level access to HDF files. 5303 // 5304 // Examples 5305 // -------- 5306 // >>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z']) # doctest: +SKIP 5307 // >>> df.to_hdf('./store.h5', 'data') # doctest: +SKIP 5308 // >>> reread = pd.read_hdf('./store.h5') # doctest: +SKIP 5309 // 5310 //go:linkname ReadHdf py.read_hdf 5311 func ReadHdf(pathOrBuf *py.Object, key *py.Object, mode *py.Object, errors *py.Object, where *py.Object, start *py.Object, stop *py.Object, columns *py.Object, iterator *py.Object, chunksize *py.Object) *py.Object 5312 5313 // Read SQL query or database table into a DataFrame. 5314 // 5315 // This function is a convenience wrapper around “read_sql_table“ and 5316 // “read_sql_query“ (for backward compatibility). It will delegate 5317 // to the specific function depending on the provided input. A SQL query 5318 // will be routed to “read_sql_query“, while a database table name will 5319 // be routed to “read_sql_table“. Note that the delegated function might 5320 // have more specific notes about their functionality not listed here. 5321 // 5322 // Parameters 5323 // ---------- 5324 // sql : str or SQLAlchemy Selectable (select or text object) 5325 // 5326 // SQL query to be executed or a table name. 5327 // 5328 // con : ADBC Connection, SQLAlchemy connectable, str, or sqlite3 connection 5329 // 5330 // ADBC provides high performance I/O with native type support, where available. 5331 // Using SQLAlchemy makes it possible to use any DB supported by that 5332 // library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible 5333 // for engine disposal and connection closure for the ADBC connection and 5334 // SQLAlchemy connectable; str connections are closed automatically. See 5335 // `here <https://docs.sqlalchemy.org/en/20/core/connections.html>`_. 5336 // 5337 // index_col : str or list of str, optional, default: None 5338 // 5339 // Column(s) to set as index(MultiIndex). 5340 // 5341 // coerce_float : bool, default True 5342 // 5343 // Attempts to convert values of non-string, non-numeric objects (like 5344 // decimal.Decimal) to floating point, useful for SQL result sets. 5345 // 5346 // params : list, tuple or dict, optional, default: None 5347 // 5348 // List of parameters to pass to execute method. The syntax used 5349 // to pass parameters is database driver dependent. Check your 5350 // database driver documentation for which of the five syntax styles, 5351 // described in PEP 249's paramstyle, is supported. 5352 // Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. 5353 // 5354 // parse_dates : list or dict, default: None 5355 // - List of column names to parse as dates. 5356 // - Dict of “{column_name: format string}“ where format string is 5357 // strftime compatible in case of parsing string times, or is one of 5358 // (D, s, ns, ms, us) in case of parsing integer timestamps. 5359 // - Dict of “{column_name: arg dict}“, where the arg dict corresponds 5360 // to the keyword arguments of :func:`pandas.to_datetime` 5361 // Especially useful with databases without native Datetime support, 5362 // such as SQLite. 5363 // 5364 // columns : list, default: None 5365 // 5366 // List of column names to select from SQL table (only used when reading 5367 // a table). 5368 // 5369 // chunksize : int, default None 5370 // 5371 // If specified, return an iterator where `chunksize` is the 5372 // number of rows to include in each chunk. 5373 // 5374 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 5375 // 5376 // Back-end data type applied to the resultant :class:`DataFrame` 5377 // (still experimental). Behaviour is as follows: 5378 // 5379 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 5380 // (default). 5381 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 5382 // DataFrame. 5383 // 5384 // .. versionadded:: 2.0 5385 // 5386 // dtype : Type name or dict of columns 5387 // 5388 // Data type for data or columns. E.g. np.float64 or 5389 // {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. 5390 // The argument is ignored if a table is passed instead of a query. 5391 // 5392 // .. versionadded:: 2.0.0 5393 // 5394 // Returns 5395 // ------- 5396 // DataFrame or Iterator[DataFrame] 5397 // 5398 // See Also 5399 // -------- 5400 // read_sql_table : Read SQL database table into a DataFrame. 5401 // read_sql_query : Read SQL query into a DataFrame. 5402 // 5403 // Examples 5404 // -------- 5405 // Read data from SQL via either a SQL query or a SQL tablename. 5406 // When using a SQLite database only SQL queries are accepted, 5407 // providing only the SQL tablename will result in an error. 5408 // 5409 // >>> from sqlite3 import connect 5410 // >>> conn = connect(':memory:') 5411 // >>> df = pd.DataFrame(data=[[0, '10/11/12'], [1, '12/11/10']], 5412 // ... columns=['int_column', 'date_column']) 5413 // >>> df.to_sql(name='test_data', con=conn) 5414 // 2 5415 // 5416 // >>> pd.read_sql('SELECT int_column, date_column FROM test_data', conn) 5417 // 5418 // int_column date_column 5419 // 5420 // 0 0 10/11/12 5421 // 1 1 12/11/10 5422 // 5423 // >>> pd.read_sql('test_data', 'postgres:///db_name') # doctest:+SKIP 5424 // 5425 // Apply date parsing to columns through the “parse_dates“ argument 5426 // The “parse_dates“ argument calls “pd.to_datetime“ on the provided columns. 5427 // Custom argument values for applying “pd.to_datetime“ on a column are specified 5428 // via a dictionary format: 5429 // 5430 // >>> pd.read_sql('SELECT int_column, date_column FROM test_data', 5431 // ... conn, 5432 // ... parse_dates={"date_column": {"format": "%d/%m/%y"}}) 5433 // 5434 // int_column date_column 5435 // 5436 // 0 0 2012-11-10 5437 // 1 1 2010-11-12 5438 // 5439 // .. versionadded:: 2.2.0 5440 // 5441 // pandas now supports reading via ADBC drivers 5442 // 5443 // >>> from adbc_driver_postgresql import dbapi # doctest:+SKIP 5444 // >>> with dbapi.connect('postgres:///db_name') as conn: # doctest:+SKIP 5445 // ... pd.read_sql('SELECT int_column FROM test_data', conn) 5446 // 5447 // int_column 5448 // 5449 // 0 0 5450 // 1 1 5451 // 5452 //go:linkname ReadSql py.read_sql 5453 func ReadSql(sql *py.Object, con *py.Object, indexCol *py.Object, coerceFloat *py.Object, params *py.Object, parseDates *py.Object, columns *py.Object, chunksize *py.Object, dtypeBackend *py.Object, dtype *py.Object) *py.Object 5454 5455 // Read SQL query into a DataFrame. 5456 // 5457 // Returns a DataFrame corresponding to the result set of the query 5458 // string. Optionally provide an `index_col` parameter to use one of the 5459 // columns as the index, otherwise default integer index will be used. 5460 // 5461 // Parameters 5462 // ---------- 5463 // sql : str SQL query or SQLAlchemy Selectable (select or text object) 5464 // 5465 // SQL query to be executed. 5466 // 5467 // con : SQLAlchemy connectable, str, or sqlite3 connection 5468 // 5469 // Using SQLAlchemy makes it possible to use any DB supported by that 5470 // library. If a DBAPI2 object, only sqlite3 is supported. 5471 // 5472 // index_col : str or list of str, optional, default: None 5473 // 5474 // Column(s) to set as index(MultiIndex). 5475 // 5476 // coerce_float : bool, default True 5477 // 5478 // Attempts to convert values of non-string, non-numeric objects (like 5479 // decimal.Decimal) to floating point. Useful for SQL result sets. 5480 // 5481 // params : list, tuple or mapping, optional, default: None 5482 // 5483 // List of parameters to pass to execute method. The syntax used 5484 // to pass parameters is database driver dependent. Check your 5485 // database driver documentation for which of the five syntax styles, 5486 // described in PEP 249's paramstyle, is supported. 5487 // Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. 5488 // 5489 // parse_dates : list or dict, default: None 5490 // - List of column names to parse as dates. 5491 // - Dict of “{column_name: format string}“ where format string is 5492 // strftime compatible in case of parsing string times, or is one of 5493 // (D, s, ns, ms, us) in case of parsing integer timestamps. 5494 // - Dict of “{column_name: arg dict}“, where the arg dict corresponds 5495 // to the keyword arguments of :func:`pandas.to_datetime` 5496 // Especially useful with databases without native Datetime support, 5497 // such as SQLite. 5498 // 5499 // chunksize : int, default None 5500 // 5501 // If specified, return an iterator where `chunksize` is the number of 5502 // rows to include in each chunk. 5503 // 5504 // dtype : Type name or dict of columns 5505 // 5506 // Data type for data or columns. E.g. np.float64 or 5507 // {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. 5508 // 5509 // .. versionadded:: 1.3.0 5510 // 5511 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 5512 // 5513 // Back-end data type applied to the resultant :class:`DataFrame` 5514 // (still experimental). Behaviour is as follows: 5515 // 5516 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 5517 // (default). 5518 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 5519 // DataFrame. 5520 // 5521 // .. versionadded:: 2.0 5522 // 5523 // Returns 5524 // ------- 5525 // DataFrame or Iterator[DataFrame] 5526 // 5527 // See Also 5528 // -------- 5529 // read_sql_table : Read SQL database table into a DataFrame. 5530 // read_sql : Read SQL query or database table into a DataFrame. 5531 // 5532 // Notes 5533 // ----- 5534 // Any datetime values with time zone information parsed via the `parse_dates` 5535 // parameter will be converted to UTC. 5536 // 5537 // Examples 5538 // -------- 5539 // >>> from sqlalchemy import create_engine # doctest: +SKIP 5540 // >>> engine = create_engine("sqlite:///database.db") # doctest: +SKIP 5541 // >>> with engine.connect() as conn, conn.begin(): # doctest: +SKIP 5542 // ... data = pd.read_sql_table("data", conn) # doctest: +SKIP 5543 // 5544 //go:linkname ReadSqlQuery py.read_sql_query 5545 func ReadSqlQuery(sql *py.Object, con *py.Object, indexCol *py.Object, coerceFloat *py.Object, params *py.Object, parseDates *py.Object, chunksize *py.Object, dtype *py.Object, dtypeBackend *py.Object) *py.Object 5546 5547 // Read SQL database table into a DataFrame. 5548 // 5549 // Given a table name and a SQLAlchemy connectable, returns a DataFrame. 5550 // This function does not support DBAPI connections. 5551 // 5552 // Parameters 5553 // ---------- 5554 // table_name : str 5555 // 5556 // Name of SQL table in database. 5557 // 5558 // con : SQLAlchemy connectable or str 5559 // 5560 // A database URI could be provided as str. 5561 // SQLite DBAPI connection mode not supported. 5562 // 5563 // schema : str, default None 5564 // 5565 // Name of SQL schema in database to query (if database flavor 5566 // supports this). Uses default schema if None (default). 5567 // 5568 // index_col : str or list of str, optional, default: None 5569 // 5570 // Column(s) to set as index(MultiIndex). 5571 // 5572 // coerce_float : bool, default True 5573 // 5574 // Attempts to convert values of non-string, non-numeric objects (like 5575 // decimal.Decimal) to floating point. Can result in loss of Precision. 5576 // 5577 // parse_dates : list or dict, default None 5578 // - List of column names to parse as dates. 5579 // - Dict of “{column_name: format string}“ where format string is 5580 // strftime compatible in case of parsing string times or is one of 5581 // (D, s, ns, ms, us) in case of parsing integer timestamps. 5582 // - Dict of “{column_name: arg dict}“, where the arg dict corresponds 5583 // to the keyword arguments of :func:`pandas.to_datetime` 5584 // Especially useful with databases without native Datetime support, 5585 // such as SQLite. 5586 // 5587 // columns : list, default None 5588 // 5589 // List of column names to select from SQL table. 5590 // 5591 // chunksize : int, default None 5592 // 5593 // If specified, returns an iterator where `chunksize` is the number of 5594 // rows to include in each chunk. 5595 // 5596 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 5597 // 5598 // Back-end data type applied to the resultant :class:`DataFrame` 5599 // (still experimental). Behaviour is as follows: 5600 // 5601 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 5602 // (default). 5603 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 5604 // DataFrame. 5605 // 5606 // .. versionadded:: 2.0 5607 // 5608 // Returns 5609 // ------- 5610 // DataFrame or Iterator[DataFrame] 5611 // 5612 // A SQL table is returned as two-dimensional data structure with labeled 5613 // axes. 5614 // 5615 // See Also 5616 // -------- 5617 // read_sql_query : Read SQL query into a DataFrame. 5618 // read_sql : Read SQL query or database table into a DataFrame. 5619 // 5620 // Notes 5621 // ----- 5622 // Any datetime values with time zone information will be converted to UTC. 5623 // 5624 // Examples 5625 // -------- 5626 // >>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP 5627 // 5628 //go:linkname ReadSqlTable py.read_sql_table 5629 func ReadSqlTable(tableName *py.Object, con *py.Object, schema *py.Object, indexCol *py.Object, coerceFloat *py.Object, parseDates *py.Object, columns *py.Object, chunksize *py.Object, dtypeBackend *py.Object) *py.Object 5630 5631 // Read text from clipboard and pass to :func:`~pandas.read_csv`. 5632 // 5633 // Parses clipboard contents similar to how CSV files are parsed 5634 // using :func:`~pandas.read_csv`. 5635 // 5636 // Parameters 5637 // ---------- 5638 // sep : str, default '\\s+' 5639 // 5640 // A string or regex delimiter. The default of ``'\\s+'`` denotes 5641 // one or more whitespace characters. 5642 // 5643 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 5644 // 5645 // Back-end data type applied to the resultant :class:`DataFrame` 5646 // (still experimental). Behaviour is as follows: 5647 // 5648 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 5649 // (default). 5650 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 5651 // DataFrame. 5652 // 5653 // .. versionadded:: 2.0 5654 // 5655 // **kwargs 5656 // 5657 // See :func:`~pandas.read_csv` for the full argument list. 5658 // 5659 // Returns 5660 // ------- 5661 // DataFrame 5662 // 5663 // A parsed :class:`~pandas.DataFrame` object. 5664 // 5665 // See Also 5666 // -------- 5667 // DataFrame.to_clipboard : Copy object to the system clipboard. 5668 // read_csv : Read a comma-separated values (csv) file into DataFrame. 5669 // read_fwf : Read a table of fixed-width formatted lines into DataFrame. 5670 // 5671 // Examples 5672 // -------- 5673 // >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) 5674 // >>> df.to_clipboard() # doctest: +SKIP 5675 // >>> pd.read_clipboard() # doctest: +SKIP 5676 // 5677 // A B C 5678 // 5679 // 0 1 2 3 5680 // 1 4 5 6 5681 // 5682 //go:linkname ReadClipboard py.read_clipboard 5683 func ReadClipboard(sep *py.Object, dtypeBackend *py.Object) *py.Object 5684 5685 // Load a parquet object from the file path, returning a DataFrame. 5686 // 5687 // Parameters 5688 // ---------- 5689 // path : str, path object or file-like object 5690 // 5691 // String, path object (implementing ``os.PathLike[str]``), or file-like 5692 // object implementing a binary ``read()`` function. 5693 // The string could be a URL. Valid URL schemes include http, ftp, s3, 5694 // gs, and file. For file URLs, a host is expected. A local file could be: 5695 // ``file://localhost/path/to/table.parquet``. 5696 // A file URL can also be a path to a directory that contains multiple 5697 // partitioned parquet files. Both pyarrow and fastparquet support 5698 // paths to directories as well as file URLs. A directory path could be: 5699 // ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``. 5700 // 5701 // engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' 5702 // 5703 // Parquet library to use. If 'auto', then the option 5704 // ``io.parquet.engine`` is used. The default ``io.parquet.engine`` 5705 // behavior is to try 'pyarrow', falling back to 'fastparquet' if 5706 // 'pyarrow' is unavailable. 5707 // 5708 // When using the ``'pyarrow'`` engine and no storage options are provided 5709 // and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec`` 5710 // (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first. 5711 // Use the filesystem keyword with an instantiated fsspec filesystem 5712 // if you wish to use its implementation. 5713 // 5714 // columns : list, default=None 5715 // 5716 // If not None, only these columns will be read from the file. 5717 // 5718 // storage_options : dict, optional 5719 // 5720 // Extra options that make sense for a particular storage connection, e.g. 5721 // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs 5722 // are forwarded to ``urllib.request.Request`` as header options. For other 5723 // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are 5724 // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more 5725 // details, and for more examples on storage options refer `here 5726 // <https://pandas.pydata.org/docs/user_guide/io.html? 5727 // highlight=storage_options#reading-writing-remote-files>`_. 5728 // 5729 // .. versionadded:: 1.3.0 5730 // 5731 // use_nullable_dtypes : bool, default False 5732 // 5733 // If True, use dtypes that use ``pd.NA`` as missing value indicator 5734 // for the resulting DataFrame. (only applicable for the ``pyarrow`` 5735 // engine) 5736 // As new dtypes are added that support ``pd.NA`` in the future, the 5737 // output with this option will change to use those dtypes. 5738 // Note: this is an experimental option, and behaviour (e.g. additional 5739 // support dtypes) may change without notice. 5740 // 5741 // .. deprecated:: 2.0 5742 // 5743 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 5744 // 5745 // Back-end data type applied to the resultant :class:`DataFrame` 5746 // (still experimental). Behaviour is as follows: 5747 // 5748 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 5749 // (default). 5750 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 5751 // DataFrame. 5752 // 5753 // .. versionadded:: 2.0 5754 // 5755 // filesystem : fsspec or pyarrow filesystem, default None 5756 // 5757 // Filesystem object to use when reading the parquet file. Only implemented 5758 // for ``engine="pyarrow"``. 5759 // 5760 // .. versionadded:: 2.1.0 5761 // 5762 // filters : List[Tuple] or List[List[Tuple]], default None 5763 // 5764 // To filter out data. 5765 // Filter syntax: [[(column, op, val), ...],...] 5766 // where op is [==, =, >, >=, <, <=, !=, in, not in] 5767 // The innermost tuples are transposed into a set of filters applied 5768 // through an `AND` operation. 5769 // The outer list combines these sets of filters through an `OR` 5770 // operation. 5771 // A single list of tuples can also be used, meaning that no `OR` 5772 // operation between set of filters is to be conducted. 5773 // 5774 // Using this argument will NOT result in row-wise filtering of the final 5775 // partitions unless ``engine="pyarrow"`` is also specified. For 5776 // other engines, filtering is only performed at the partition level, that is, 5777 // to prevent the loading of some row-groups and/or files. 5778 // 5779 // .. versionadded:: 2.1.0 5780 // 5781 // **kwargs 5782 // 5783 // Any additional kwargs are passed to the engine. 5784 // 5785 // Returns 5786 // ------- 5787 // DataFrame 5788 // 5789 // See Also 5790 // -------- 5791 // DataFrame.to_parquet : Create a parquet object that serializes a DataFrame. 5792 // 5793 // Examples 5794 // -------- 5795 // >>> original_df = pd.DataFrame( 5796 // ... {"foo": range(5), "bar": range(5, 10)} 5797 // ... ) 5798 // >>> original_df 5799 // 5800 // foo bar 5801 // 5802 // 0 0 5 5803 // 1 1 6 5804 // 2 2 7 5805 // 3 3 8 5806 // 4 4 9 5807 // >>> df_parquet_bytes = original_df.to_parquet() 5808 // >>> from io import BytesIO 5809 // >>> restored_df = pd.read_parquet(BytesIO(df_parquet_bytes)) 5810 // >>> restored_df 5811 // 5812 // foo bar 5813 // 5814 // 0 0 5 5815 // 1 1 6 5816 // 2 2 7 5817 // 3 3 8 5818 // 4 4 9 5819 // >>> restored_df.equals(original_df) 5820 // True 5821 // >>> restored_bar = pd.read_parquet(BytesIO(df_parquet_bytes), columns=["bar"]) 5822 // >>> restored_bar 5823 // 5824 // bar 5825 // 5826 // 0 5 5827 // 1 6 5828 // 2 7 5829 // 3 8 5830 // 4 9 5831 // >>> restored_bar.equals(original_df[['bar']]) 5832 // True 5833 // 5834 // The function uses `kwargs` that are passed directly to the engine. 5835 // In the following example, we use the `filters` argument of the pyarrow 5836 // engine to filter the rows of the DataFrame. 5837 // 5838 // Since `pyarrow` is the default engine, we can omit the `engine` argument. 5839 // Note that the `filters` argument is implemented by the `pyarrow` engine, 5840 // which can benefit from multithreading and also potentially be more 5841 // economical in terms of memory. 5842 // 5843 // >>> sel = [("foo", ">", 2)] 5844 // >>> restored_part = pd.read_parquet(BytesIO(df_parquet_bytes), filters=sel) 5845 // >>> restored_part 5846 // 5847 // foo bar 5848 // 5849 // 0 3 8 5850 // 1 4 9 5851 // 5852 //go:linkname ReadParquet py.read_parquet 5853 func ReadParquet(path *py.Object, engine *py.Object, columns *py.Object, storageOptions *py.Object, useNullableDtypes *py.Object, dtypeBackend *py.Object, filesystem *py.Object, filters *py.Object) *py.Object 5854 5855 // Load an ORC object from the file path, returning a DataFrame. 5856 // 5857 // Parameters 5858 // ---------- 5859 // path : str, path object, or file-like object 5860 // 5861 // String, path object (implementing ``os.PathLike[str]``), or file-like 5862 // object implementing a binary ``read()`` function. The string could be a URL. 5863 // Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is 5864 // expected. A local file could be: 5865 // ``file://localhost/path/to/table.orc``. 5866 // 5867 // columns : list, default None 5868 // 5869 // If not None, only these columns will be read from the file. 5870 // Output always follows the ordering of the file and not the columns list. 5871 // This mirrors the original behaviour of 5872 // :external+pyarrow:py:meth:`pyarrow.orc.ORCFile.read`. 5873 // 5874 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 5875 // 5876 // Back-end data type applied to the resultant :class:`DataFrame` 5877 // (still experimental). Behaviour is as follows: 5878 // 5879 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 5880 // (default). 5881 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 5882 // DataFrame. 5883 // 5884 // .. versionadded:: 2.0 5885 // 5886 // filesystem : fsspec or pyarrow filesystem, default None 5887 // 5888 // Filesystem object to use when reading the parquet file. 5889 // 5890 // .. versionadded:: 2.1.0 5891 // 5892 // **kwargs 5893 // 5894 // Any additional kwargs are passed to pyarrow. 5895 // 5896 // Returns 5897 // ------- 5898 // DataFrame 5899 // 5900 // Notes 5901 // ----- 5902 // Before using this function you should read the :ref:`user guide about ORC <io.orc>` 5903 // and :ref:`install optional dependencies <install.warn_orc>`. 5904 // 5905 // If “path“ is a URI scheme pointing to a local or remote file (e.g. "s3://"), 5906 // a “pyarrow.fs“ filesystem will be attempted to read the file. You can also pass a 5907 // pyarrow or fsspec filesystem object into the filesystem keyword to override this 5908 // behavior. 5909 // 5910 // Examples 5911 // -------- 5912 // >>> result = pd.read_orc("example_pa.orc") # doctest: +SKIP 5913 // 5914 //go:linkname ReadOrc py.read_orc 5915 func ReadOrc(path *py.Object, columns *py.Object, dtypeBackend *py.Object, filesystem *py.Object) *py.Object 5916 5917 // Load a feather-format object from the file path. 5918 // 5919 // Parameters 5920 // ---------- 5921 // path : str, path object, or file-like object 5922 // 5923 // String, path object (implementing ``os.PathLike[str]``), or file-like 5924 // object implementing a binary ``read()`` function. The string could be a URL. 5925 // Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is 5926 // expected. A local file could be: ``file://localhost/path/to/table.feather``. 5927 // 5928 // columns : sequence, default None 5929 // 5930 // If not provided, all columns are read. 5931 // 5932 // use_threads : bool, default True 5933 // 5934 // Whether to parallelize reading using multiple threads. 5935 // 5936 // storage_options : dict, optional 5937 // 5938 // Extra options that make sense for a particular storage connection, e.g. 5939 // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs 5940 // are forwarded to ``urllib.request.Request`` as header options. For other 5941 // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are 5942 // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more 5943 // details, and for more examples on storage options refer `here 5944 // <https://pandas.pydata.org/docs/user_guide/io.html? 5945 // highlight=storage_options#reading-writing-remote-files>`_. 5946 // 5947 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 5948 // 5949 // Back-end data type applied to the resultant :class:`DataFrame` 5950 // (still experimental). Behaviour is as follows: 5951 // 5952 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 5953 // (default). 5954 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 5955 // DataFrame. 5956 // 5957 // .. versionadded:: 2.0 5958 // 5959 // Returns 5960 // ------- 5961 // type of object stored in file 5962 // 5963 // Examples 5964 // -------- 5965 // >>> df = pd.read_feather("path/to/file.feather") # doctest: +SKIP 5966 // 5967 //go:linkname ReadFeather py.read_feather 5968 func ReadFeather(path *py.Object, columns *py.Object, useThreads *py.Object, storageOptions *py.Object, dtypeBackend *py.Object) *py.Object 5969 5970 // Load data from Google BigQuery. 5971 // 5972 // .. deprecated:: 2.2.0 5973 // 5974 // Please use ``pandas_gbq.read_gbq`` instead. 5975 // 5976 // This function requires the `pandas-gbq package 5977 // <https://pandas-gbq.readthedocs.io>`__. 5978 // 5979 // See the `How to authenticate with Google BigQuery 5980 // <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__ 5981 // guide for authentication instructions. 5982 // 5983 // Parameters 5984 // ---------- 5985 // query : str 5986 // 5987 // SQL-Like Query to return data values. 5988 // 5989 // project_id : str, optional 5990 // 5991 // Google BigQuery Account project ID. Optional when available from 5992 // the environment. 5993 // 5994 // index_col : str, optional 5995 // 5996 // Name of result column to use for index in results DataFrame. 5997 // 5998 // col_order : list(str), optional 5999 // 6000 // List of BigQuery column names in the desired order for results 6001 // DataFrame. 6002 // 6003 // reauth : bool, default False 6004 // 6005 // Force Google BigQuery to re-authenticate the user. This is useful 6006 // if multiple accounts are used. 6007 // 6008 // auth_local_webserver : bool, default True 6009 // 6010 // Use the `local webserver flow`_ instead of the `console flow`_ 6011 // when getting user credentials. 6012 // 6013 // .. _local webserver flow: 6014 // https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server 6015 // .. _console flow: 6016 // https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console 6017 // 6018 // *New in version 0.2.0 of pandas-gbq*. 6019 // 6020 // .. versionchanged:: 1.5.0 6021 // Default value is changed to ``True``. Google has deprecated the 6022 // ``auth_local_webserver = False`` `"out of band" (copy-paste) 6023 // flow 6024 // <https://developers.googleblog.com/2022/02/making-oauth-flows-safer.html?m=1#disallowed-oob>`_. 6025 // 6026 // dialect : str, default 'legacy' 6027 // 6028 // Note: The default value is changing to 'standard' in a future version. 6029 // 6030 // SQL syntax dialect to use. Value can be one of: 6031 // 6032 // ``'legacy'`` 6033 // Use BigQuery's legacy SQL dialect. For more information see 6034 // `BigQuery Legacy SQL Reference 6035 // <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__. 6036 // ``'standard'`` 6037 // Use BigQuery's standard SQL, which is 6038 // compliant with the SQL 2011 standard. For more information 6039 // see `BigQuery Standard SQL Reference 6040 // <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__. 6041 // 6042 // location : str, optional 6043 // 6044 // Location where the query job should run. See the `BigQuery locations 6045 // documentation 6046 // <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a 6047 // list of available locations. The location must match that of any 6048 // datasets used in the query. 6049 // 6050 // *New in version 0.5.0 of pandas-gbq*. 6051 // 6052 // configuration : dict, optional 6053 // 6054 // Query config parameters for job processing. 6055 // For example: 6056 // 6057 // configuration = {'query': {'useQueryCache': False}} 6058 // 6059 // For more information see `BigQuery REST API Reference 6060 // <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__. 6061 // 6062 // credentials : google.auth.credentials.Credentials, optional 6063 // 6064 // Credentials for accessing Google APIs. Use this parameter to override 6065 // default credentials, such as to use Compute Engine 6066 // :class:`google.auth.compute_engine.Credentials` or Service Account 6067 // :class:`google.oauth2.service_account.Credentials` directly. 6068 // 6069 // *New in version 0.8.0 of pandas-gbq*. 6070 // 6071 // use_bqstorage_api : bool, default False 6072 // 6073 // Use the `BigQuery Storage API 6074 // <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to 6075 // download query results quickly, but at an increased cost. To use this 6076 // API, first `enable it in the Cloud Console 6077 // <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__. 6078 // You must also have the `bigquery.readsessions.create 6079 // <https://cloud.google.com/bigquery/docs/access-control#roles>`__ 6080 // permission on the project you are billing queries to. 6081 // 6082 // This feature requires version 0.10.0 or later of the ``pandas-gbq`` 6083 // package. It also requires the ``google-cloud-bigquery-storage`` and 6084 // ``fastavro`` packages. 6085 // 6086 // max_results : int, optional 6087 // 6088 // If set, limit the maximum number of rows to fetch from the query 6089 // results. 6090 // 6091 // progress_bar_type : Optional, str 6092 // 6093 // If set, use the `tqdm <https://tqdm.github.io/>`__ library to 6094 // display a progress bar while the data downloads. Install the 6095 // ``tqdm`` package to use this feature. 6096 // 6097 // Possible values of ``progress_bar_type`` include: 6098 // 6099 // ``None`` 6100 // No progress bar. 6101 // ``'tqdm'`` 6102 // Use the :func:`tqdm.tqdm` function to print a progress bar 6103 // to :data:`sys.stderr`. 6104 // ``'tqdm_notebook'`` 6105 // Use the :func:`tqdm.tqdm_notebook` function to display a 6106 // progress bar as a Jupyter notebook widget. 6107 // ``'tqdm_gui'`` 6108 // Use the :func:`tqdm.tqdm_gui` function to display a 6109 // progress bar as a graphical dialog box. 6110 // 6111 // Returns 6112 // ------- 6113 // df: DataFrame 6114 // 6115 // DataFrame representing results of query. 6116 // 6117 // See Also 6118 // -------- 6119 // pandas_gbq.read_gbq : This function in the pandas-gbq library. 6120 // DataFrame.to_gbq : Write a DataFrame to Google BigQuery. 6121 // 6122 // Examples 6123 // -------- 6124 // Example taken from `Google BigQuery documentation 6125 // <https://cloud.google.com/bigquery/docs/pandas-gbq-migration>`_ 6126 // 6127 // >>> sql = "SELECT name FROM table_name WHERE state = 'TX' LIMIT 100;" 6128 // >>> df = pd.read_gbq(sql, dialect="standard") # doctest: +SKIP 6129 // >>> project_id = "your-project-id" # doctest: +SKIP 6130 // >>> df = pd.read_gbq(sql, 6131 // ... project_id=project_id, 6132 // ... dialect="standard" 6133 // ... ) # doctest: +SKIP 6134 // 6135 //go:linkname ReadGbq py.read_gbq 6136 func ReadGbq(query *py.Object, projectId *py.Object, indexCol *py.Object, colOrder *py.Object, reauth *py.Object, authLocalWebserver *py.Object, dialect *py.Object, location *py.Object, configuration *py.Object, credentials *py.Object, useBqstorageApi *py.Object, maxResults *py.Object, progressBarType *py.Object) *py.Object 6137 6138 // Read HTML tables into a “list“ of “DataFrame“ objects. 6139 // 6140 // Parameters 6141 // ---------- 6142 // io : str, path object, or file-like object 6143 // 6144 // String, path object (implementing ``os.PathLike[str]``), or file-like 6145 // object implementing a string ``read()`` function. 6146 // The string can represent a URL or the HTML itself. Note that 6147 // lxml only accepts the http, ftp and file url protocols. If you have a 6148 // URL that starts with ``'https'`` you might try removing the ``'s'``. 6149 // 6150 // .. deprecated:: 2.1.0 6151 // Passing html literal strings is deprecated. 6152 // Wrap literal string/bytes input in ``io.StringIO``/``io.BytesIO`` instead. 6153 // 6154 // match : str or compiled regular expression, optional 6155 // 6156 // The set of tables containing text matching this regex or string will be 6157 // returned. Unless the HTML is extremely simple you will probably need to 6158 // pass a non-empty string here. Defaults to '.+' (match any non-empty 6159 // string). The default value will return all tables contained on a page. 6160 // This value is converted to a regular expression so that there is 6161 // consistent behavior between Beautiful Soup and lxml. 6162 // 6163 // flavor : {"lxml", "html5lib", "bs4"} or list-like, optional 6164 // 6165 // The parsing engine (or list of parsing engines) to use. 'bs4' and 6166 // 'html5lib' are synonymous with each other, they are both there for 6167 // backwards compatibility. The default of ``None`` tries to use ``lxml`` 6168 // to parse and if that fails it falls back on ``bs4`` + ``html5lib``. 6169 // 6170 // header : int or list-like, optional 6171 // 6172 // The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to 6173 // make the columns headers. 6174 // 6175 // index_col : int or list-like, optional 6176 // 6177 // The column (or list of columns) to use to create the index. 6178 // 6179 // skiprows : int, list-like or slice, optional 6180 // 6181 // Number of rows to skip after parsing the column integer. 0-based. If a 6182 // sequence of integers or a slice is given, will skip the rows indexed by 6183 // that sequence. Note that a single element sequence means 'skip the nth 6184 // row' whereas an integer means 'skip n rows'. 6185 // 6186 // attrs : dict, optional 6187 // 6188 // This is a dictionary of attributes that you can pass to use to identify 6189 // the table in the HTML. These are not checked for validity before being 6190 // passed to lxml or Beautiful Soup. However, these attributes must be 6191 // valid HTML table attributes to work correctly. For example, :: 6192 // 6193 // attrs = {'id': 'table'} 6194 // 6195 // is a valid attribute dictionary because the 'id' HTML tag attribute is 6196 // a valid HTML attribute for *any* HTML tag as per `this document 6197 // <https://html.spec.whatwg.org/multipage/dom.html#global-attributes>`__. :: 6198 // 6199 // attrs = {'asdf': 'table'} 6200 // 6201 // is *not* a valid attribute dictionary because 'asdf' is not a valid 6202 // HTML attribute even if it is a valid XML attribute. Valid HTML 4.01 6203 // table attributes can be found `here 6204 // <http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2>`__. A 6205 // working draft of the HTML 5 spec can be found `here 6206 // <https://html.spec.whatwg.org/multipage/tables.html>`__. It contains the 6207 // latest information on table attributes for the modern web. 6208 // 6209 // parse_dates : bool, optional 6210 // 6211 // See :func:`~read_csv` for more details. 6212 // 6213 // thousands : str, optional 6214 // 6215 // Separator to use to parse thousands. Defaults to ``','``. 6216 // 6217 // encoding : str, optional 6218 // 6219 // The encoding used to decode the web page. Defaults to ``None``.``None`` 6220 // preserves the previous encoding behavior, which depends on the 6221 // underlying parser library (e.g., the parser library will try to use 6222 // the encoding provided by the document). 6223 // 6224 // decimal : str, default '.' 6225 // 6226 // Character to recognize as decimal point (e.g. use ',' for European 6227 // data). 6228 // 6229 // converters : dict, default None 6230 // 6231 // Dict of functions for converting values in certain columns. Keys can 6232 // either be integers or column labels, values are functions that take one 6233 // input argument, the cell (not column) content, and return the 6234 // transformed content. 6235 // 6236 // na_values : iterable, default None 6237 // 6238 // Custom NA values. 6239 // 6240 // keep_default_na : bool, default True 6241 // 6242 // If na_values are specified and keep_default_na is False the default NaN 6243 // values are overridden, otherwise they're appended to. 6244 // 6245 // displayed_only : bool, default True 6246 // 6247 // Whether elements with "display: none" should be parsed. 6248 // 6249 // extract_links : {None, "all", "header", "body", "footer"} 6250 // 6251 // Table elements in the specified section(s) with <a> tags will have their 6252 // href extracted. 6253 // 6254 // .. versionadded:: 1.5.0 6255 // 6256 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 6257 // 6258 // Back-end data type applied to the resultant :class:`DataFrame` 6259 // (still experimental). Behaviour is as follows: 6260 // 6261 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 6262 // (default). 6263 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 6264 // DataFrame. 6265 // 6266 // .. versionadded:: 2.0 6267 // 6268 // storage_options : dict, optional 6269 // 6270 // Extra options that make sense for a particular storage connection, e.g. 6271 // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs 6272 // are forwarded to ``urllib.request.Request`` as header options. For other 6273 // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are 6274 // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more 6275 // details, and for more examples on storage options refer `here 6276 // <https://pandas.pydata.org/docs/user_guide/io.html? 6277 // highlight=storage_options#reading-writing-remote-files>`_. 6278 // 6279 // .. versionadded:: 2.1.0 6280 // 6281 // Returns 6282 // ------- 6283 // dfs 6284 // 6285 // A list of DataFrames. 6286 // 6287 // See Also 6288 // -------- 6289 // read_csv : Read a comma-separated values (csv) file into DataFrame. 6290 // 6291 // Notes 6292 // ----- 6293 // Before using this function you should read the :ref:`gotchas about the 6294 // HTML parsing libraries <io.html.gotchas>`. 6295 // 6296 // Expect to do some cleanup after you call this function. For example, you 6297 // might need to manually assign column names if the column names are 6298 // converted to NaN when you pass the `header=0` argument. We try to assume as 6299 // little as possible about the structure of the table and push the 6300 // idiosyncrasies of the HTML contained in the table to the user. 6301 // 6302 // This function searches for “<table>“ elements and only for “<tr>“ 6303 // and “<th>“ rows and “<td>“ elements within each “<tr>“ or “<th>“ 6304 // element in the table. “<td>“ stands for "table data". This function 6305 // attempts to properly handle “colspan“ and “rowspan“ attributes. 6306 // If the function has a “<thead>“ argument, it is used to construct 6307 // the header, otherwise the function attempts to find the header within 6308 // the body (by putting rows with only “<th>“ elements into the header). 6309 // 6310 // Similar to :func:`~read_csv` the `header` argument is applied 6311 // **after** `skiprows` is applied. 6312 // 6313 // This function will *always* return a list of :class:`DataFrame` *or* 6314 // it will fail, e.g., it will *not* return an empty list. 6315 // 6316 // Examples 6317 // -------- 6318 // See the :ref:`read_html documentation in the IO section of the docs 6319 // <io.read_html>` for some examples of reading in HTML tables. 6320 // 6321 //go:linkname ReadHtml py.read_html 6322 func ReadHtml(io *py.Object) *py.Object 6323 6324 // Read XML document into a :class:`~pandas.DataFrame` object. 6325 // 6326 // .. versionadded:: 1.3.0 6327 // 6328 // Parameters 6329 // ---------- 6330 // path_or_buffer : str, path object, or file-like object 6331 // 6332 // String, path object (implementing ``os.PathLike[str]``), or file-like 6333 // object implementing a ``read()`` function. The string can be any valid XML 6334 // string or a path. The string can further be a URL. Valid URL schemes 6335 // include http, ftp, s3, and file. 6336 // 6337 // .. deprecated:: 2.1.0 6338 // Passing xml literal strings is deprecated. 6339 // Wrap literal xml input in ``io.StringIO`` or ``io.BytesIO`` instead. 6340 // 6341 // xpath : str, optional, default './\*' 6342 // 6343 // The ``XPath`` to parse required set of nodes for migration to 6344 // :class:`~pandas.DataFrame`.``XPath`` should return a collection of elements 6345 // and not a single element. Note: The ``etree`` parser supports limited ``XPath`` 6346 // expressions. For more complex ``XPath``, use ``lxml`` which requires 6347 // installation. 6348 // 6349 // namespaces : dict, optional 6350 // 6351 // The namespaces defined in XML document as dicts with key being 6352 // namespace prefix and value the URI. There is no need to include all 6353 // namespaces in XML, only the ones used in ``xpath`` expression. 6354 // Note: if XML document uses default namespace denoted as 6355 // `xmlns='<URI>'` without a prefix, you must assign any temporary 6356 // namespace prefix such as 'doc' to the URI in order to parse 6357 // underlying nodes and/or attributes. For example, :: 6358 // 6359 // namespaces = {"doc": "https://example.com"} 6360 // 6361 // elems_only : bool, optional, default False 6362 // 6363 // Parse only the child elements at the specified ``xpath``. By default, 6364 // all child elements and non-empty text nodes are returned. 6365 // 6366 // attrs_only : bool, optional, default False 6367 // 6368 // Parse only the attributes at the specified ``xpath``. 6369 // By default, all attributes are returned. 6370 // 6371 // names : list-like, optional 6372 // 6373 // Column names for DataFrame of parsed XML data. Use this parameter to 6374 // rename original element names and distinguish same named elements and 6375 // attributes. 6376 // 6377 // dtype : Type name or dict of column -> type, optional 6378 // 6379 // Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32, 6380 // 'c': 'Int64'} 6381 // Use `str` or `object` together with suitable `na_values` settings 6382 // to preserve and not interpret dtype. 6383 // If converters are specified, they will be applied INSTEAD 6384 // of dtype conversion. 6385 // 6386 // .. versionadded:: 1.5.0 6387 // 6388 // converters : dict, optional 6389 // 6390 // Dict of functions for converting values in certain columns. Keys can either 6391 // be integers or column labels. 6392 // 6393 // .. versionadded:: 1.5.0 6394 // 6395 // parse_dates : bool or list of int or names or list of lists or dict, default False 6396 // 6397 // Identifiers to parse index or columns to datetime. The behavior is as follows: 6398 // 6399 // * boolean. If True -> try parsing the index. 6400 // * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 6401 // each as a separate date column. 6402 // * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as 6403 // a single date column. 6404 // * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call 6405 // result 'foo' 6406 // 6407 // .. versionadded:: 1.5.0 6408 // 6409 // encoding : str, optional, default 'utf-8' 6410 // 6411 // Encoding of XML document. 6412 // 6413 // parser : {'lxml','etree'}, default 'lxml' 6414 // 6415 // Parser module to use for retrieval of data. Only 'lxml' and 6416 // 'etree' are supported. With 'lxml' more complex ``XPath`` searches 6417 // and ability to use XSLT stylesheet are supported. 6418 // 6419 // stylesheet : str, path object or file-like object 6420 // 6421 // A URL, file-like object, or a raw string containing an XSLT script. 6422 // This stylesheet should flatten complex, deeply nested XML documents 6423 // for easier parsing. To use this feature you must have ``lxml`` module 6424 // installed and specify 'lxml' as ``parser``. The ``xpath`` must 6425 // reference nodes of transformed XML document generated after XSLT 6426 // transformation and not the original XML document. Only XSLT 1.0 6427 // scripts and not later versions is currently supported. 6428 // 6429 // iterparse : dict, optional 6430 // 6431 // The nodes or attributes to retrieve in iterparsing of XML document 6432 // as a dict with key being the name of repeating element and value being 6433 // list of elements or attribute names that are descendants of the repeated 6434 // element. Note: If this option is used, it will replace ``xpath`` parsing 6435 // and unlike ``xpath``, descendants do not need to relate to each other but can 6436 // exist any where in document under the repeating element. This memory- 6437 // efficient method should be used for very large XML files (500MB, 1GB, or 5GB+). 6438 // For example, :: 6439 // 6440 // iterparse = {"row_element": ["child_elem", "attr", "grandchild_elem"]} 6441 // 6442 // .. versionadded:: 1.5.0 6443 // 6444 // compression : str or dict, default 'infer' 6445 // 6446 // For on-the-fly decompression of on-disk data. If 'infer' and 'path_or_buffer' is 6447 // path-like, then detect compression from the following extensions: '.gz', 6448 // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' 6449 // (otherwise no compression). 6450 // If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. 6451 // Set to ``None`` for no decompression. 6452 // Can also be a dict with key ``'method'`` set 6453 // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and 6454 // other key-value pairs are forwarded to 6455 // ``zipfile.ZipFile``, ``gzip.GzipFile``, 6456 // ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or 6457 // ``tarfile.TarFile``, respectively. 6458 // As an example, the following could be passed for Zstandard decompression using a 6459 // custom compression dictionary: 6460 // ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. 6461 // 6462 // .. versionadded:: 1.5.0 6463 // Added support for `.tar` files. 6464 // 6465 // .. versionchanged:: 1.4.0 Zstandard support. 6466 // 6467 // storage_options : dict, optional 6468 // 6469 // Extra options that make sense for a particular storage connection, e.g. 6470 // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs 6471 // are forwarded to ``urllib.request.Request`` as header options. For other 6472 // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are 6473 // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more 6474 // details, and for more examples on storage options refer `here 6475 // <https://pandas.pydata.org/docs/user_guide/io.html? 6476 // highlight=storage_options#reading-writing-remote-files>`_. 6477 // 6478 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 6479 // 6480 // Back-end data type applied to the resultant :class:`DataFrame` 6481 // (still experimental). Behaviour is as follows: 6482 // 6483 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 6484 // (default). 6485 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 6486 // DataFrame. 6487 // 6488 // .. versionadded:: 2.0 6489 // 6490 // Returns 6491 // ------- 6492 // df 6493 // 6494 // A DataFrame. 6495 // 6496 // See Also 6497 // -------- 6498 // read_json : Convert a JSON string to pandas object. 6499 // read_html : Read HTML tables into a list of DataFrame objects. 6500 // 6501 // Notes 6502 // ----- 6503 // This method is best designed to import shallow XML documents in 6504 // following format which is the ideal fit for the two-dimensions of a 6505 // “DataFrame“ (row by column). :: 6506 // 6507 // <root> 6508 // <row> 6509 // <column1>data</column1> 6510 // <column2>data</column2> 6511 // <column3>data</column3> 6512 // ... 6513 // </row> 6514 // <row> 6515 // ... 6516 // </row> 6517 // ... 6518 // </root> 6519 // 6520 // As a file format, XML documents can be designed any way including 6521 // layout of elements and attributes as long as it conforms to W3C 6522 // specifications. Therefore, this method is a convenience handler for 6523 // a specific flatter design and not all possible XML structures. 6524 // 6525 // However, for more complex XML documents, “stylesheet“ allows you to 6526 // temporarily redesign original document with XSLT (a special purpose 6527 // language) for a flatter version for migration to a DataFrame. 6528 // 6529 // This function will *always* return a single :class:`DataFrame` or raise 6530 // exceptions due to issues with XML document, “xpath“, or other 6531 // parameters. 6532 // 6533 // See the :ref:`read_xml documentation in the IO section of the docs 6534 // <io.read_xml>` for more information in using this method to parse XML 6535 // files to DataFrames. 6536 // 6537 // Examples 6538 // -------- 6539 // >>> from io import StringIO 6540 // >>> xml = ”'<?xml version='1.0' encoding='utf-8'?> 6541 // ... <data xmlns="http://example.com"> 6542 // ... <row> 6543 // ... <shape>square</shape> 6544 // ... <degrees>360</degrees> 6545 // ... <sides>4.0</sides> 6546 // ... </row> 6547 // ... <row> 6548 // ... <shape>circle</shape> 6549 // ... <degrees>360</degrees> 6550 // ... <sides/> 6551 // ... </row> 6552 // ... <row> 6553 // ... <shape>triangle</shape> 6554 // ... <degrees>180</degrees> 6555 // ... <sides>3.0</sides> 6556 // ... </row> 6557 // ... </data>”' 6558 // 6559 // >>> df = pd.read_xml(StringIO(xml)) 6560 // >>> df 6561 // 6562 // shape degrees sides 6563 // 6564 // 0 square 360 4.0 6565 // 1 circle 360 NaN 6566 // 2 triangle 180 3.0 6567 // 6568 // >>> xml = ”'<?xml version='1.0' encoding='utf-8'?> 6569 // ... <data> 6570 // ... <row shape="square" degrees="360" sides="4.0"/> 6571 // ... <row shape="circle" degrees="360"/> 6572 // ... <row shape="triangle" degrees="180" sides="3.0"/> 6573 // ... </data>”' 6574 // 6575 // >>> df = pd.read_xml(StringIO(xml), xpath=".//row") 6576 // >>> df 6577 // 6578 // shape degrees sides 6579 // 6580 // 0 square 360 4.0 6581 // 1 circle 360 NaN 6582 // 2 triangle 180 3.0 6583 // 6584 // >>> xml = ”'<?xml version='1.0' encoding='utf-8'?> 6585 // ... <doc:data xmlns:doc="https://example.com"> 6586 // ... <doc:row> 6587 // ... <doc:shape>square</doc:shape> 6588 // ... <doc:degrees>360</doc:degrees> 6589 // ... <doc:sides>4.0</doc:sides> 6590 // ... </doc:row> 6591 // ... <doc:row> 6592 // ... <doc:shape>circle</doc:shape> 6593 // ... <doc:degrees>360</doc:degrees> 6594 // ... <doc:sides/> 6595 // ... </doc:row> 6596 // ... <doc:row> 6597 // ... <doc:shape>triangle</doc:shape> 6598 // ... <doc:degrees>180</doc:degrees> 6599 // ... <doc:sides>3.0</doc:sides> 6600 // ... </doc:row> 6601 // ... </doc:data>”' 6602 // 6603 // >>> df = pd.read_xml(StringIO(xml), 6604 // ... xpath="//doc:row", 6605 // ... namespaces={"doc": "https://example.com"}) 6606 // >>> df 6607 // 6608 // shape degrees sides 6609 // 6610 // 0 square 360 4.0 6611 // 1 circle 360 NaN 6612 // 2 triangle 180 3.0 6613 // 6614 // >>> xml_data = ”' 6615 // ... <data> 6616 // ... <row> 6617 // ... <index>0</index> 6618 // ... <a>1</a> 6619 // ... <b>2.5</b> 6620 // ... <c>True</c> 6621 // ... <d>a</d> 6622 // ... <e>2019-12-31 00:00:00</e> 6623 // ... </row> 6624 // ... <row> 6625 // ... <index>1</index> 6626 // ... <b>4.5</b> 6627 // ... <c>False</c> 6628 // ... <d>b</d> 6629 // ... <e>2019-12-31 00:00:00</e> 6630 // ... </row> 6631 // ... </data> 6632 // ... ”' 6633 // 6634 // >>> df = pd.read_xml(StringIO(xml_data), 6635 // ... dtype_backend="numpy_nullable", 6636 // ... parse_dates=["e"]) 6637 // >>> df 6638 // 6639 // index a b c d e 6640 // 6641 // 0 0 1 2.5 True a 2019-12-31 6642 // 1 1 <NA> 4.5 False b 2019-12-31 6643 // 6644 //go:linkname ReadXml py.read_xml 6645 func ReadXml(pathOrBuffer *py.Object) *py.Object 6646 6647 // Convert a JSON string to pandas object. 6648 // 6649 // Parameters 6650 // ---------- 6651 // path_or_buf : a valid JSON str, path object or file-like object 6652 // 6653 // Any valid string path is acceptable. The string could be a URL. Valid 6654 // URL schemes include http, ftp, s3, and file. For file URLs, a host is 6655 // expected. A local file could be: 6656 // ``file://localhost/path/to/table.json``. 6657 // 6658 // If you want to pass in a path object, pandas accepts any 6659 // ``os.PathLike``. 6660 // 6661 // By file-like object, we refer to objects with a ``read()`` method, 6662 // such as a file handle (e.g. via builtin ``open`` function) 6663 // or ``StringIO``. 6664 // 6665 // .. deprecated:: 2.1.0 6666 // Passing json literal strings is deprecated. 6667 // 6668 // orient : str, optional 6669 // 6670 // Indication of expected JSON string format. 6671 // Compatible JSON strings can be produced by ``to_json()`` with a 6672 // corresponding orient value. 6673 // The set of possible orients is: 6674 // 6675 // - ``'split'`` : dict like 6676 // ``{index -> [index], columns -> [columns], data -> [values]}`` 6677 // - ``'records'`` : list like 6678 // ``[{column -> value}, ... , {column -> value}]`` 6679 // - ``'index'`` : dict like ``{index -> {column -> value}}`` 6680 // - ``'columns'`` : dict like ``{column -> {index -> value}}`` 6681 // - ``'values'`` : just the values array 6682 // - ``'table'`` : dict like ``{'schema': {schema}, 'data': {data}}`` 6683 // 6684 // The allowed and default values depend on the value 6685 // of the `typ` parameter. 6686 // 6687 // * when ``typ == 'series'``, 6688 // 6689 // - allowed orients are ``{'split','records','index'}`` 6690 // - default is ``'index'`` 6691 // - The Series index must be unique for orient ``'index'``. 6692 // 6693 // * when ``typ == 'frame'``, 6694 // 6695 // - allowed orients are ``{'split','records','index', 6696 // 'columns','values', 'table'}`` 6697 // - default is ``'columns'`` 6698 // - The DataFrame index must be unique for orients ``'index'`` and 6699 // ``'columns'``. 6700 // - The DataFrame columns must be unique for orients ``'index'``, 6701 // ``'columns'``, and ``'records'``. 6702 // 6703 // typ : {'frame', 'series'}, default 'frame' 6704 // 6705 // The type of object to recover. 6706 // 6707 // dtype : bool or dict, default None 6708 // 6709 // If True, infer dtypes; if a dict of column to dtype, then use those; 6710 // if False, then don't infer dtypes at all, applies only to the data. 6711 // 6712 // For all ``orient`` values except ``'table'``, default is True. 6713 // 6714 // convert_axes : bool, default None 6715 // 6716 // Try to convert the axes to the proper dtypes. 6717 // 6718 // For all ``orient`` values except ``'table'``, default is True. 6719 // 6720 // convert_dates : bool or list of str, default True 6721 // 6722 // If True then default datelike columns may be converted (depending on 6723 // keep_default_dates). 6724 // If False, no dates will be converted. 6725 // If a list of column names, then those columns will be converted and 6726 // default datelike columns may also be converted (depending on 6727 // keep_default_dates). 6728 // 6729 // keep_default_dates : bool, default True 6730 // 6731 // If parsing dates (convert_dates is not False), then try to parse the 6732 // default datelike columns. 6733 // A column label is datelike if 6734 // 6735 // * it ends with ``'_at'``, 6736 // 6737 // * it ends with ``'_time'``, 6738 // 6739 // * it begins with ``'timestamp'``, 6740 // 6741 // * it is ``'modified'``, or 6742 // 6743 // * it is ``'date'``. 6744 // 6745 // precise_float : bool, default False 6746 // 6747 // Set to enable usage of higher precision (strtod) function when 6748 // decoding string to double values. Default (False) is to use fast but 6749 // less precise builtin functionality. 6750 // 6751 // date_unit : str, default None 6752 // 6753 // The timestamp unit to detect if converting dates. The default behaviour 6754 // is to try and detect the correct precision, but if this is not desired 6755 // then pass one of 's', 'ms', 'us' or 'ns' to force parsing only seconds, 6756 // milliseconds, microseconds or nanoseconds respectively. 6757 // 6758 // encoding : str, default is 'utf-8' 6759 // 6760 // The encoding to use to decode py3 bytes. 6761 // 6762 // encoding_errors : str, optional, default "strict" 6763 // 6764 // How encoding errors are treated. `List of possible values 6765 // <https://docs.python.org/3/library/codecs.html#error-handlers>`_ . 6766 // 6767 // .. versionadded:: 1.3.0 6768 // 6769 // lines : bool, default False 6770 // 6771 // Read the file as a json object per line. 6772 // 6773 // chunksize : int, optional 6774 // 6775 // Return JsonReader object for iteration. 6776 // See the `line-delimited json docs 6777 // <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#line-delimited-json>`_ 6778 // for more information on ``chunksize``. 6779 // This can only be passed if `lines=True`. 6780 // If this is None, the file will be read into memory all at once. 6781 // 6782 // compression : str or dict, default 'infer' 6783 // 6784 // For on-the-fly decompression of on-disk data. If 'infer' and 'path_or_buf' is 6785 // path-like, then detect compression from the following extensions: '.gz', 6786 // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' 6787 // (otherwise no compression). 6788 // If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. 6789 // Set to ``None`` for no decompression. 6790 // Can also be a dict with key ``'method'`` set 6791 // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and 6792 // other key-value pairs are forwarded to 6793 // ``zipfile.ZipFile``, ``gzip.GzipFile``, 6794 // ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or 6795 // ``tarfile.TarFile``, respectively. 6796 // As an example, the following could be passed for Zstandard decompression using a 6797 // custom compression dictionary: 6798 // ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. 6799 // 6800 // .. versionadded:: 1.5.0 6801 // Added support for `.tar` files. 6802 // 6803 // .. versionchanged:: 1.4.0 Zstandard support. 6804 // 6805 // nrows : int, optional 6806 // 6807 // The number of lines from the line-delimited jsonfile that has to be read. 6808 // This can only be passed if `lines=True`. 6809 // If this is None, all the rows will be returned. 6810 // 6811 // storage_options : dict, optional 6812 // 6813 // Extra options that make sense for a particular storage connection, e.g. 6814 // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs 6815 // are forwarded to ``urllib.request.Request`` as header options. For other 6816 // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are 6817 // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more 6818 // details, and for more examples on storage options refer `here 6819 // <https://pandas.pydata.org/docs/user_guide/io.html? 6820 // highlight=storage_options#reading-writing-remote-files>`_. 6821 // 6822 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 6823 // 6824 // Back-end data type applied to the resultant :class:`DataFrame` 6825 // (still experimental). Behaviour is as follows: 6826 // 6827 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 6828 // (default). 6829 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 6830 // DataFrame. 6831 // 6832 // .. versionadded:: 2.0 6833 // 6834 // engine : {"ujson", "pyarrow"}, default "ujson" 6835 // 6836 // Parser engine to use. The ``"pyarrow"`` engine is only available when 6837 // ``lines=True``. 6838 // 6839 // .. versionadded:: 2.0 6840 // 6841 // Returns 6842 // ------- 6843 // Series, DataFrame, or pandas.api.typing.JsonReader 6844 // 6845 // A JsonReader is returned when ``chunksize`` is not ``0`` or ``None``. 6846 // Otherwise, the type returned depends on the value of ``typ``. 6847 // 6848 // See Also 6849 // -------- 6850 // DataFrame.to_json : Convert a DataFrame to a JSON string. 6851 // Series.to_json : Convert a Series to a JSON string. 6852 // json_normalize : Normalize semi-structured JSON data into a flat table. 6853 // 6854 // Notes 6855 // ----- 6856 // Specific to “orient='table'“, if a :class:`DataFrame` with a literal 6857 // :class:`Index` name of `index` gets written with :func:`to_json`, the 6858 // subsequent read operation will incorrectly set the :class:`Index` name to 6859 // “None“. This is because `index` is also used by :func:`DataFrame.to_json` 6860 // to denote a missing :class:`Index` name, and the subsequent 6861 // :func:`read_json` operation cannot distinguish between the two. The same 6862 // limitation is encountered with a :class:`MultiIndex` and any names 6863 // beginning with “'level_'“. 6864 // 6865 // Examples 6866 // -------- 6867 // >>> from io import StringIO 6868 // >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], 6869 // ... index=['row 1', 'row 2'], 6870 // ... columns=['col 1', 'col 2']) 6871 // 6872 // Encoding/decoding a Dataframe using “'split'“ formatted JSON: 6873 // 6874 // >>> df.to_json(orient='split') 6875 // 6876 // '{"columns":["col 1","col 2"],"index":["row 1","row 2"],"data":[["a","b"],["c","d"]]}' 6877 // 6878 // >>> pd.read_json(StringIO(_), orient='split') 6879 // 6880 // col 1 col 2 6881 // 6882 // row 1 a b 6883 // row 2 c d 6884 // 6885 // Encoding/decoding a Dataframe using “'index'“ formatted JSON: 6886 // 6887 // >>> df.to_json(orient='index') 6888 // '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' 6889 // 6890 // >>> pd.read_json(StringIO(_), orient='index') 6891 // 6892 // col 1 col 2 6893 // 6894 // row 1 a b 6895 // row 2 c d 6896 // 6897 // Encoding/decoding a Dataframe using “'records'“ formatted JSON. 6898 // Note that index labels are not preserved with this encoding. 6899 // 6900 // >>> df.to_json(orient='records') 6901 // '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' 6902 // >>> pd.read_json(StringIO(_), orient='records') 6903 // 6904 // col 1 col 2 6905 // 6906 // 0 a b 6907 // 1 c d 6908 // 6909 // # Encoding with Table Schema 6910 // 6911 // >>> df.to_json(orient='table') 6912 // 6913 // '{"schema":{"fields":[{"name":"index","type":"string"},{"name":"col 1","type":"string"},{"name":"col 2","type":"string"}],"primaryKey":["index"],"pandas_version":"1.4.0"},"data":[{"index":"row 1","col 1":"a","col 2":"b"},{"index":"row 2","col 1":"c","col 2":"d"}]}' 6914 // 6915 // The following example uses “dtype_backend="numpy_nullable"“ 6916 // 6917 // >>> data = ”'{"index": {"0": 0, "1": 1}, 6918 // ... "a": {"0": 1, "1": null}, 6919 // ... "b": {"0": 2.5, "1": 4.5}, 6920 // ... "c": {"0": true, "1": false}, 6921 // ... "d": {"0": "a", "1": "b"}, 6922 // ... "e": {"0": 1577.2, "1": 1577.1}}”' 6923 // >>> pd.read_json(StringIO(data), dtype_backend="numpy_nullable") 6924 // 6925 // index a b c d e 6926 // 6927 // 0 0 1 2.5 True a 1577.2 6928 // 1 1 <NA> 4.5 False b 1577.1 6929 // 6930 //go:linkname ReadJson py.read_json 6931 func ReadJson(pathOrBuf *py.Object) *py.Object 6932 6933 // Read Stata file into DataFrame. 6934 // 6935 // Parameters 6936 // ---------- 6937 // filepath_or_buffer : str, path object or file-like object 6938 // 6939 // Any valid string path is acceptable. The string could be a URL. Valid 6940 // URL schemes include http, ftp, s3, and file. For file URLs, a host is 6941 // expected. A local file could be: ``file://localhost/path/to/table.dta``. 6942 // 6943 // If you want to pass in a path object, pandas accepts any ``os.PathLike``. 6944 // 6945 // By file-like object, we refer to objects with a ``read()`` method, 6946 // such as a file handle (e.g. via builtin ``open`` function) 6947 // or ``StringIO``. 6948 // 6949 // convert_dates : bool, default True 6950 // 6951 // Convert date variables to DataFrame time values. 6952 // 6953 // convert_categoricals : bool, default True 6954 // 6955 // Read value labels and convert columns to Categorical/Factor variables. 6956 // 6957 // index_col : str, optional 6958 // 6959 // Column to set as index. 6960 // 6961 // convert_missing : bool, default False 6962 // 6963 // Flag indicating whether to convert missing values to their Stata 6964 // representations. If False, missing values are replaced with nan. 6965 // If True, columns containing missing values are returned with 6966 // object data types and missing values are represented by 6967 // StataMissingValue objects. 6968 // 6969 // preserve_dtypes : bool, default True 6970 // 6971 // Preserve Stata datatypes. If False, numeric data are upcast to pandas 6972 // default types for foreign data (float64 or int64). 6973 // 6974 // columns : list or None 6975 // 6976 // Columns to retain. Columns will be returned in the given order. None 6977 // returns all columns. 6978 // 6979 // order_categoricals : bool, default True 6980 // 6981 // Flag indicating whether converted categorical data are ordered. 6982 // 6983 // chunksize : int, default None 6984 // 6985 // Return StataReader object for iterations, returns chunks with 6986 // given number of lines. 6987 // 6988 // iterator : bool, default False 6989 // 6990 // Return StataReader object. 6991 // 6992 // compression : str or dict, default 'infer' 6993 // 6994 // For on-the-fly decompression of on-disk data. If 'infer' and 'filepath_or_buffer' is 6995 // path-like, then detect compression from the following extensions: '.gz', 6996 // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' 6997 // (otherwise no compression). 6998 // If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. 6999 // Set to ``None`` for no decompression. 7000 // Can also be a dict with key ``'method'`` set 7001 // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and 7002 // other key-value pairs are forwarded to 7003 // ``zipfile.ZipFile``, ``gzip.GzipFile``, 7004 // ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or 7005 // ``tarfile.TarFile``, respectively. 7006 // As an example, the following could be passed for Zstandard decompression using a 7007 // custom compression dictionary: 7008 // ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. 7009 // 7010 // .. versionadded:: 1.5.0 7011 // Added support for `.tar` files. 7012 // 7013 // storage_options : dict, optional 7014 // 7015 // Extra options that make sense for a particular storage connection, e.g. 7016 // host, port, username, password, etc. For HTTP(S) URLs the key-value pairs 7017 // are forwarded to ``urllib.request.Request`` as header options. For other 7018 // URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are 7019 // forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more 7020 // details, and for more examples on storage options refer `here 7021 // <https://pandas.pydata.org/docs/user_guide/io.html? 7022 // highlight=storage_options#reading-writing-remote-files>`_. 7023 // 7024 // Returns 7025 // ------- 7026 // DataFrame or pandas.api.typing.StataReader 7027 // 7028 // See Also 7029 // -------- 7030 // io.stata.StataReader : Low-level reader for Stata data files. 7031 // DataFrame.to_stata: Export Stata data files. 7032 // 7033 // Notes 7034 // ----- 7035 // Categorical variables read through an iterator may not have the same 7036 // categories and dtype. This occurs when a variable stored in a DTA 7037 // file is associated to an incomplete set of value labels that only 7038 // label a strict subset of the values. 7039 // 7040 // Examples 7041 // -------- 7042 // 7043 // # Creating a dummy stata for this example 7044 // 7045 // >>> df = pd.DataFrame({'animal': ['falcon', 'parrot', 'falcon', 'parrot'], 7046 // ... 'speed': [350, 18, 361, 15]}) # doctest: +SKIP 7047 // >>> df.to_stata('animals.dta') # doctest: +SKIP 7048 // 7049 // Read a Stata dta file: 7050 // 7051 // >>> df = pd.read_stata('animals.dta') # doctest: +SKIP 7052 // 7053 // Read a Stata dta file in 10,000 line chunks: 7054 // 7055 // >>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8") # doctest: +SKIP 7056 // >>> df = pd.DataFrame(values, columns=["i"]) # doctest: +SKIP 7057 // >>> df.to_stata('filename.dta') # doctest: +SKIP 7058 // 7059 // >>> with pd.read_stata('filename.dta', chunksize=10000) as itr: # doctest: +SKIP 7060 // >>> for chunk in itr: 7061 // ... # Operate on a single chunk, e.g., chunk.mean() 7062 // ... pass # doctest: +SKIP 7063 // 7064 //go:linkname ReadStata py.read_stata 7065 func ReadStata(filepathOrBuffer *py.Object) *py.Object 7066 7067 // Read SAS files stored as either XPORT or SAS7BDAT format files. 7068 // 7069 // Parameters 7070 // ---------- 7071 // filepath_or_buffer : str, path object, or file-like object 7072 // 7073 // String, path object (implementing ``os.PathLike[str]``), or file-like 7074 // object implementing a binary ``read()`` function. The string could be a URL. 7075 // Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is 7076 // expected. A local file could be: 7077 // ``file://localhost/path/to/table.sas7bdat``. 7078 // 7079 // format : str {'xport', 'sas7bdat'} or None 7080 // 7081 // If None, file format is inferred from file extension. If 'xport' or 7082 // 'sas7bdat', uses the corresponding format. 7083 // 7084 // index : identifier of index column, defaults to None 7085 // 7086 // Identifier of column that should be used as index of the DataFrame. 7087 // 7088 // encoding : str, default is None 7089 // 7090 // Encoding for text data. If None, text data are stored as raw bytes. 7091 // 7092 // chunksize : int 7093 // 7094 // Read file `chunksize` lines at a time, returns iterator. 7095 // 7096 // iterator : bool, defaults to False 7097 // 7098 // If True, returns an iterator for reading the file incrementally. 7099 // 7100 // compression : str or dict, default 'infer' 7101 // 7102 // For on-the-fly decompression of on-disk data. If 'infer' and 'filepath_or_buffer' is 7103 // path-like, then detect compression from the following extensions: '.gz', 7104 // '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' 7105 // (otherwise no compression). 7106 // If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in. 7107 // Set to ``None`` for no decompression. 7108 // Can also be a dict with key ``'method'`` set 7109 // to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and 7110 // other key-value pairs are forwarded to 7111 // ``zipfile.ZipFile``, ``gzip.GzipFile``, 7112 // ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or 7113 // ``tarfile.TarFile``, respectively. 7114 // As an example, the following could be passed for Zstandard decompression using a 7115 // custom compression dictionary: 7116 // ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. 7117 // 7118 // .. versionadded:: 1.5.0 7119 // Added support for `.tar` files. 7120 // 7121 // Returns 7122 // ------- 7123 // DataFrame if iterator=False and chunksize=None, else SAS7BDATReader 7124 // or XportReader 7125 // 7126 // Examples 7127 // -------- 7128 // >>> df = pd.read_sas("sas_data.sas7bdat") # doctest: +SKIP 7129 // 7130 //go:linkname ReadSas py.read_sas 7131 func ReadSas(filepathOrBuffer *py.Object) *py.Object 7132 7133 // Load an SPSS file from the file path, returning a DataFrame. 7134 // 7135 // Parameters 7136 // ---------- 7137 // path : str or Path 7138 // 7139 // File path. 7140 // 7141 // usecols : list-like, optional 7142 // 7143 // Return a subset of the columns. If None, return all columns. 7144 // 7145 // convert_categoricals : bool, default is True 7146 // 7147 // Convert categorical columns into pd.Categorical. 7148 // 7149 // dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 7150 // 7151 // Back-end data type applied to the resultant :class:`DataFrame` 7152 // (still experimental). Behaviour is as follows: 7153 // 7154 // * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 7155 // (default). 7156 // * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 7157 // DataFrame. 7158 // 7159 // .. versionadded:: 2.0 7160 // 7161 // Returns 7162 // ------- 7163 // DataFrame 7164 // 7165 // Examples 7166 // -------- 7167 // >>> df = pd.read_spss("spss_data.sav") # doctest: +SKIP 7168 // 7169 //go:linkname ReadSpss py.read_spss 7170 func ReadSpss(path *py.Object, usecols *py.Object, convertCategoricals *py.Object, dtypeBackend *py.Object) *py.Object 7171 7172 // Normalize semi-structured JSON data into a flat table. 7173 // 7174 // Parameters 7175 // ---------- 7176 // data : dict or list of dicts 7177 // 7178 // Unserialized JSON objects. 7179 // 7180 // record_path : str or list of str, default None 7181 // 7182 // Path in each object to list of records. If not passed, data will be 7183 // assumed to be an array of records. 7184 // 7185 // meta : list of paths (str or list of str), default None 7186 // 7187 // Fields to use as metadata for each record in resulting table. 7188 // 7189 // meta_prefix : str, default None 7190 // 7191 // If True, prefix records with dotted (?) path, e.g. foo.bar.field if 7192 // meta is ['foo', 'bar']. 7193 // 7194 // record_prefix : str, default None 7195 // 7196 // If True, prefix records with dotted (?) path, e.g. foo.bar.field if 7197 // path to records is ['foo', 'bar']. 7198 // 7199 // errors : {'raise', 'ignore'}, default 'raise' 7200 // 7201 // Configures error handling. 7202 // 7203 // * 'ignore' : will ignore KeyError if keys listed in meta are not 7204 // always present. 7205 // * 'raise' : will raise KeyError if keys listed in meta are not 7206 // always present. 7207 // 7208 // sep : str, default '.' 7209 // 7210 // Nested records will generate names separated by sep. 7211 // e.g., for sep='.', {'foo': {'bar': 0}} -> foo.bar. 7212 // 7213 // max_level : int, default None 7214 // 7215 // Max number of levels(depth of dict) to normalize. 7216 // if None, normalizes all levels. 7217 // 7218 // Returns 7219 // ------- 7220 // frame : DataFrame 7221 // Normalize semi-structured JSON data into a flat table. 7222 // 7223 // Examples 7224 // -------- 7225 // >>> data = [ 7226 // ... {"id": 1, "name": {"first": "Coleen", "last": "Volk"}}, 7227 // ... {"name": {"given": "Mark", "family": "Regner"}}, 7228 // ... {"id": 2, "name": "Faye Raker"}, 7229 // ... ] 7230 // >>> pd.json_normalize(data) 7231 // 7232 // id name.first name.last name.given name.family name 7233 // 7234 // 0 1.0 Coleen Volk NaN NaN NaN 7235 // 1 NaN NaN NaN Mark Regner NaN 7236 // 2 2.0 NaN NaN NaN NaN Faye Raker 7237 // 7238 // >>> data = [ 7239 // ... { 7240 // ... "id": 1, 7241 // ... "name": "Cole Volk", 7242 // ... "fitness": {"height": 130, "weight": 60}, 7243 // ... }, 7244 // ... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}}, 7245 // ... { 7246 // ... "id": 2, 7247 // ... "name": "Faye Raker", 7248 // ... "fitness": {"height": 130, "weight": 60}, 7249 // ... }, 7250 // ... ] 7251 // >>> pd.json_normalize(data, max_level=0) 7252 // 7253 // id name fitness 7254 // 7255 // 0 1.0 Cole Volk {'height': 130, 'weight': 60} 7256 // 1 NaN Mark Reg {'height': 130, 'weight': 60} 7257 // 2 2.0 Faye Raker {'height': 130, 'weight': 60} 7258 // 7259 // Normalizes nested data up to level 1. 7260 // 7261 // >>> data = [ 7262 // ... { 7263 // ... "id": 1, 7264 // ... "name": "Cole Volk", 7265 // ... "fitness": {"height": 130, "weight": 60}, 7266 // ... }, 7267 // ... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}}, 7268 // ... { 7269 // ... "id": 2, 7270 // ... "name": "Faye Raker", 7271 // ... "fitness": {"height": 130, "weight": 60}, 7272 // ... }, 7273 // ... ] 7274 // >>> pd.json_normalize(data, max_level=1) 7275 // 7276 // id name fitness.height fitness.weight 7277 // 7278 // 0 1.0 Cole Volk 130 60 7279 // 1 NaN Mark Reg 130 60 7280 // 2 2.0 Faye Raker 130 60 7281 // 7282 // >>> data = [ 7283 // ... { 7284 // ... "state": "Florida", 7285 // ... "shortname": "FL", 7286 // ... "info": {"governor": "Rick Scott"}, 7287 // ... "counties": [ 7288 // ... {"name": "Dade", "population": 12345}, 7289 // ... {"name": "Broward", "population": 40000}, 7290 // ... {"name": "Palm Beach", "population": 60000}, 7291 // ... ], 7292 // ... }, 7293 // ... { 7294 // ... "state": "Ohio", 7295 // ... "shortname": "OH", 7296 // ... "info": {"governor": "John Kasich"}, 7297 // ... "counties": [ 7298 // ... {"name": "Summit", "population": 1234}, 7299 // ... {"name": "Cuyahoga", "population": 1337}, 7300 // ... ], 7301 // ... }, 7302 // ... ] 7303 // >>> result = pd.json_normalize( 7304 // ... data, "counties", ["state", "shortname", ["info", "governor"]] 7305 // ... ) 7306 // >>> result 7307 // 7308 // name population state shortname info.governor 7309 // 7310 // 0 Dade 12345 Florida FL Rick Scott 7311 // 1 Broward 40000 Florida FL Rick Scott 7312 // 2 Palm Beach 60000 Florida FL Rick Scott 7313 // 3 Summit 1234 Ohio OH John Kasich 7314 // 4 Cuyahoga 1337 Ohio OH John Kasich 7315 // 7316 // >>> data = {"A": [1, 2]} 7317 // >>> pd.json_normalize(data, "A", record_prefix="Prefix.") 7318 // 7319 // Prefix.0 7320 // 7321 // 0 1 7322 // 1 2 7323 // 7324 // Returns normalized data with columns prefixed with the given string. 7325 // 7326 //go:linkname JsonNormalize py.json_normalize 7327 func JsonNormalize(data *py.Object, recordPath *py.Object, meta *py.Object, metaPrefix *py.Object, recordPrefix *py.Object, errors *py.Object, sep *py.Object, maxLevel *py.Object) *py.Object 7328 7329 // Run the pandas test suite using pytest. 7330 // 7331 // By default, runs with the marks -m "not slow and not network and not db" 7332 // 7333 // Parameters 7334 // ---------- 7335 // extra_args : list[str], default None 7336 // 7337 // Extra marks to run the tests. 7338 // 7339 // run_doctests : bool, default False 7340 // 7341 // Whether to only run the Python and Cython doctests. If you would like to run 7342 // both doctests/regular tests, just append "--doctest-modules"/"--doctest-cython" 7343 // to extra_args. 7344 // 7345 // Examples 7346 // -------- 7347 // >>> pd.test() # doctest: +SKIP 7348 // running: pytest... 7349 // 7350 //go:linkname Test py.test 7351 func Test(extraArgs *py.Object, runDoctests *py.Object) *py.Object