github.com/google/osv-scalibr@v0.4.1/clients/datasource/pypi_registry.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package datasource
    16  
    17  import (
    18  	"context"
    19  	"encoding/json"
    20  	"fmt"
    21  	"io"
    22  	"net/http"
    23  	"net/url"
    24  	"os"
    25  	"path"
    26  	"path/filepath"
    27  	"slices"
    28  	"strings"
    29  	"sync"
    30  	"time"
    31  
    32  	"github.com/google/osv-scalibr/clients/internal/pypi"
    33  	"github.com/google/osv-scalibr/log"
    34  )
    35  
    36  // pyPIAPI holds the base of the URL of PyPI Index API.
    37  const pyPIAPI = "https://pypi.org/simple"
    38  
    39  // PyPIRegistryAPIClient defines a client to fetch metadata from a PyPI registry.
    40  // TODO(#541): support multiple registries and authentication
    41  type PyPIRegistryAPIClient struct {
    42  	registry      string
    43  	localRegistry string
    44  
    45  	// Cache fields
    46  	mu             *sync.Mutex
    47  	cacheTimestamp *time.Time // If set, this means we loaded from a cache
    48  	responses      *RequestCache[string, response]
    49  }
    50  
    51  // NewPyPIRegistryAPIClient returns a new PyPIRegistryAPIClient.
    52  func NewPyPIRegistryAPIClient(registry string, localRegistry string) *PyPIRegistryAPIClient {
    53  	if registry == "" {
    54  		registry = pyPIAPI
    55  	}
    56  	if localRegistry != "" {
    57  		localRegistry = filepath.Join(localRegistry, "pypi")
    58  	}
    59  	return &PyPIRegistryAPIClient{
    60  		registry:      registry,
    61  		localRegistry: localRegistry,
    62  		mu:            &sync.Mutex{},
    63  		responses:     NewRequestCache[string, response](),
    64  	}
    65  }
    66  
    67  // SetLocalRegistry sets the local directory that stores the downloaded PyPI manifests.
    68  func (p *PyPIRegistryAPIClient) SetLocalRegistry(localRegistry string) {
    69  	if localRegistry != "" {
    70  		localRegistry = filepath.Join(localRegistry, "pypi")
    71  	}
    72  	p.localRegistry = localRegistry
    73  }
    74  
    75  // GetIndex queries the simple API index for a given project.
    76  func (p *PyPIRegistryAPIClient) GetIndex(ctx context.Context, project string) (pypi.IndexResponse, error) {
    77  	reqPath, err := url.JoinPath(p.registry, project)
    78  	if err != nil {
    79  		return pypi.IndexResponse{}, err
    80  	}
    81  
    82  	// The Index API requires an ending slash.
    83  	if !strings.HasSuffix(reqPath, "/") {
    84  		reqPath += "/"
    85  	}
    86  
    87  	var indexResp pypi.IndexResponse
    88  	resp, err := p.get(ctx, reqPath, true)
    89  	if err != nil {
    90  		return pypi.IndexResponse{}, err
    91  	}
    92  	err = json.Unmarshal(resp, &indexResp)
    93  	return indexResp, err
    94  }
    95  
    96  // GetFile retrieves the content of a file from the registry at the given URL.
    97  func (p *PyPIRegistryAPIClient) GetFile(ctx context.Context, url string) ([]byte, error) {
    98  	return p.get(ctx, url, false)
    99  }
   100  
   101  // urlToPath converts a URL to a file path.
   102  func urlToPath(rawURL string) string {
   103  	parsedURL, err := url.Parse(rawURL)
   104  	if err != nil {
   105  		log.Warnf("Error parsing URL %s: %s", rawURL, err)
   106  		return ""
   107  	}
   108  	return path.Join(parsedURL.Hostname(), parsedURL.Path)
   109  }
   110  
   111  func (p *PyPIRegistryAPIClient) get(ctx context.Context, url string, queryIndex bool) ([]byte, error) {
   112  	file := ""
   113  	urlPath := urlToPath(url)
   114  	if urlPath != "" && p.localRegistry != "" {
   115  		file = filepath.Join(p.localRegistry, urlPath)
   116  		if content, err := os.ReadFile(file); err == nil {
   117  			// We can still fetch the file from upstream if error is not nil.
   118  			return content, nil
   119  		}
   120  	}
   121  
   122  	resp, err := p.responses.Get(url, func() (response, error) {
   123  		req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
   124  		if err != nil {
   125  			return response{}, err
   126  		}
   127  
   128  		if queryIndex {
   129  			req.Header.Set("Accept", "application/vnd.pypi.simple.v1+json")
   130  		}
   131  		resp, err := http.DefaultClient.Do(req)
   132  		if err != nil {
   133  			return response{}, fmt.Errorf("%w: PyPI registry query failed: %w", errAPIFailed, err)
   134  		}
   135  		defer resp.Body.Close()
   136  
   137  		if !slices.Contains([]int{http.StatusOK, http.StatusNotFound, http.StatusUnauthorized}, resp.StatusCode) {
   138  			// Only cache responses with Status OK, NotFound, or Unauthorized
   139  			return response{}, fmt.Errorf("%w: PyPI registry query status: %d", errAPIFailed, resp.StatusCode)
   140  		}
   141  
   142  		b, err := io.ReadAll(resp.Body)
   143  		if err != nil {
   144  			return response{}, fmt.Errorf("failed to read body: %w", err)
   145  		}
   146  
   147  		if file != "" {
   148  			if err := writeFile(file, b); err != nil {
   149  				log.Warnf("failed to write response of %s: %v", url, err)
   150  			}
   151  		}
   152  
   153  		return response{StatusCode: resp.StatusCode, Body: b}, nil
   154  	})
   155  	if err != nil {
   156  		return nil, err
   157  	}
   158  
   159  	if resp.StatusCode != http.StatusOK {
   160  		return nil, fmt.Errorf("%w: PyPI registry query status: %d", errAPIFailed, resp.StatusCode)
   161  	}
   162  
   163  	return resp.Body, nil
   164  }