github.com/apache/beam/sdks/v2@v2.48.2/go/examples/fhirio/read_write_pubsub/read_write_pubsub.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  // read_write_pubsub is a pipeline example using the fhirio connector to read
    17  // FHIR resources from GCS, write them to a GCP FHIR store, and, if a PubSub
    18  // topic is provided, read the written resources from the FHIR store and log them
    19  // based on the PubSub notifications about store updates.
    20  //
    21  // Pre-requisites:
    22  // 1. NDJSON-encoded FHIR resources stored in GCS.
    23  // 2. Dataflow Runner enabled: https://cloud.google.com/dataflow/docs/quickstarts.
    24  // 3. A Google Cloud FHIR store. Optionally, PubSub notifications set up on the store.
    25  // (see: https://cloud.google.com/healthcare-api/docs/concepts/pubsub).
    26  //
    27  // Running this pipeline requires providing a fully qualified GCS address
    28  // (potentially containing wildcards) to where your FHIR resources are stored, a
    29  // path to the FHIR store where the resources should be written to, and,
    30  // optionally, the PubSub topic name your FHIR store is sending notifications to,
    31  // in addition to the usual flags for the Dataflow runner.
    32  //
    33  // An example command for executing this pipeline on GCP is as follows:
    34  //
    35  //	export PROJECT="$(gcloud config get-value project)"
    36  //	export TEMP_LOCATION="gs://MY-BUCKET/temp"
    37  //	export STAGING_LOCATION="gs://MY-BUCKET/staging"
    38  //	export REGION="us-central1"
    39  //	export SOURCE_GCS_LOCATION="gs://MY_BUCKET/path/to/resources/**"
    40  //	export FHIR_STORE_PATH="MY_FHIR_STORE_PATH"
    41  //	export PUBSUB_TOPIC="MY_FHIR_STORE_TOPIC"
    42  //	cd ./sdks/go
    43  //	go run ./examples/fhirio/read_write_pubsub/read_write_pubsub.go \
    44  //	  --runner=dataflow \
    45  //	  --temp_location=$TEMP_LOCATION \
    46  //	  --staging_location=$STAGING_LOCATION \
    47  //	  --project=$PROJECT \
    48  //	  --region=$REGION \
    49  //	  --worker_harness_container_image=apache/beam_go_sdk:latest \
    50  //	  --sourceGcsLocation=$SOURCE_GCS_LOCATION \
    51  //	  --fhirStore=$FHIR_STORE_PATH \
    52  //	  --pubsubTopic=$PUBSUB_TOPIC
    53  package main
    54  
    55  import (
    56  	"context"
    57  	"encoding/json"
    58  	"flag"
    59  	"fmt"
    60  	"strings"
    61  	"time"
    62  
    63  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    64  	"github.com/apache/beam/sdks/v2/go/pkg/beam/io/fhirio"
    65  	"github.com/apache/beam/sdks/v2/go/pkg/beam/io/pubsubio"
    66  	"github.com/apache/beam/sdks/v2/go/pkg/beam/io/textio"
    67  	"github.com/apache/beam/sdks/v2/go/pkg/beam/log"
    68  	"github.com/apache/beam/sdks/v2/go/pkg/beam/options/gcpopts"
    69  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    70  	"github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx"
    71  )
    72  
    73  var (
    74  	// Required flag with the source directory for GCS files to read, including
    75  	// wildcards. Directory should contain the resources files in NDJSON format.
    76  	sourceGcsLocation = flag.String("sourceGcsLocation", "", "The source directory for GCS files to read, including wildcards.")
    77  
    78  	// Required flag with target FHIR store to write data to, must be of the full format:
    79  	// "projects/project_id/locations/location/datasets/DATASET_ID/fhirStores/FHIR_STORE_ID"
    80  	fhirStore = flag.String("fhirStore", "", "The target FHIR Store to write data to, must be of the full format.")
    81  
    82  	// Optional flag with the pubsub topic of your FHIR store to read and log upon store updates.
    83  	pubsubTopic = flag.String("pubsubTopic", "", "The PubSub topic to listen to.")
    84  )
    85  
    86  func init() {
    87  	register.Function1x1[string, string](WrapInBundle)
    88  	register.DoFn2x0[context.Context, string](&LoggerFn{})
    89  }
    90  
    91  // WrapInBundle takes a FHIR resource string and wraps it as a Bundle resource.
    92  // Useful so we can publish the given resource through ExecuteBundles.
    93  func WrapInBundle(resource string) string {
    94  	var r struct {
    95  		ResourceType string `json:"resourceType"`
    96  	}
    97  	json.NewDecoder(strings.NewReader(resource)).Decode(&r)
    98  	return fmt.Sprintf(`{
    99          "resourceType": "Bundle",
   100          "type": "batch",
   101          "entry": [
   102          	{
   103          		"request": {
   104          			"method": "POST",
   105          			"url": "%s"
   106          		},
   107          		"resource": %s
   108          	}
   109  		]
   110  	}`, r.ResourceType, resource)
   111  }
   112  
   113  // LoggerFn is a helper DoFn to log elements received.
   114  type LoggerFn struct {
   115  	LogPrefix string
   116  }
   117  
   118  // ProcessElement logs each element it receives.
   119  func (fn *LoggerFn) ProcessElement(ctx context.Context, elm string) {
   120  	log.Infof(ctx, "%s: %v", fn.LogPrefix, elm)
   121  }
   122  
   123  // FinishBundle waits a bit so the job server finishes receiving logs.
   124  func (fn *LoggerFn) FinishBundle() {
   125  	time.Sleep(2 * time.Second)
   126  }
   127  
   128  func main() {
   129  	flag.Parse()
   130  	beam.Init()
   131  
   132  	p, s := beam.NewPipelineWithRoot()
   133  
   134  	// Read resources from GCS.
   135  	resourcesInGcs := textio.Read(s, *sourceGcsLocation)
   136  	resourceBundles := beam.ParDo(s, WrapInBundle, resourcesInGcs)
   137  
   138  	// Write resources to store.
   139  	_, failedWritesErrorMessage := fhirio.ExecuteBundles(s, *fhirStore, resourceBundles)
   140  	beam.ParDo0(s, &LoggerFn{"Failed Write"}, failedWritesErrorMessage)
   141  
   142  	if *pubsubTopic != "" {
   143  		// PubSub notifications will be emitted containing the path of the resource once
   144  		// it is written to the store. Simultaneously read notifications and resources
   145  		// from PubSub and store, respectively.
   146  		resourceNotifications := pubsubio.Read(s, *gcpopts.Project, *pubsubTopic, nil)
   147  		resourcesInFhirStore, deadLetters := fhirio.Read(s, resourceNotifications)
   148  
   149  		// Log the read resources or read errors to the server.
   150  		beam.ParDo0(s, &LoggerFn{"Read Resource"}, resourcesInFhirStore)
   151  		beam.ParDo0(s, &LoggerFn{"Got Dead Letter"}, deadLetters)
   152  	}
   153  
   154  	ctx := context.Background()
   155  	if err := beamx.Run(ctx, p); err != nil {
   156  		log.Fatalf(ctx, "Failed to execute job: %v", err)
   157  	}
   158  }