github.com/biogo/biogo@v1.0.4/feat/genome/parse.karyotype (about)

     1  #!/bin/bash
     2  
     3  # This file parses UCSC Chromosome Band table into a Go source code file.
     4  #
     5  # This script will only work on the Chromosome Band table - it will not
     6  # work on an 'Assembly' or 'Scaffold' table.
     7  #
     8  # The prefix, e.g. chr, will be used to label the chromosomes (e.g. chr1, chr2 ... )
     9  # By default, "chr" is used. The package will be used to name the generated package.
    10  #
    11  # To download data tables, see http://genome.ucsc.edu/cgi-bin/hgTables
    12  
    13  file=$1
    14  prefix=$2
    15  species=$3
    16  package=$4
    17  
    18  if [ ! -n "$file" ]; then
    19  	echo "Please specify the UCSC karyotype table file"
    20  	exit
    21  fi
    22  
    23  if [ ! -n "$prefix" ]; then
    24  	prefix="chr"
    25  fi
    26  
    27  label="$(tr '[:lower:]' '[:upper:]' <<< ${prefix:0:1})${prefix:1}"
    28  
    29  (
    30  	echo -e "// DO NOT EDIT. This file was autogenerated by parse.karyotype\n"
    31  	echo "// Package $package defines chromosome and band intervals for the $species karyotype based on the $package assembly."
    32  	echo -e "package $package\n"
    33  
    34  	echo -e "import (\n\t\"github.com/biogo/biogo/feat\"\n\t\"github.com/biogo/biogo/feat/genome\"\n)\n"
    35  
    36  	# # chromosomes
    37  	echo 'var ('
    38  	< $file zcat \
    39  	| grep -v '^#' \
    40  	| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
    41  	| awk '{print $1,$0}' \
    42  	| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \
    43  	| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
    44  	| sort -k1,1g -k3rn,3 \
    45  	| sort -u -k1g,1 \
    46  	| awk -v prefix=$prefix -v label=$label '{print "\t"label$2" = genome.Chromosome{Chr: \""prefix$2"\", Desc: \"Chromosome\", Length:",$4"}"}'
    47  	echo -e ')\n'
    48  	echo 'var Chromosomes = []*genome.Chromosome{'
    49  	< $file zcat \
    50  	| grep -v '^#' \
    51  	| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
    52  	| awk '{print $1,$0}' \
    53  	| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \
    54  	| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
    55  	| sort -k1,1g -k3rn,3 \
    56  	| sort -u -k1g,1 \
    57  	| awk  -v label=$label '{print "\t&"label$2","}'
    58  	echo -e '}\n'
    59  
    60  	# bands
    61  	echo 'var ('
    62  	< $file zcat \
    63  	| grep -v '^#' \
    64  	| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
    65  	| awk '{print $1,$0}' \
    66  	| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \
    67  	| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
    68  	| sort -k1,1g -k3n,3 \
    69  	| awk  -v label=$label '{print "\t"label$2"_"$5" = genome.Band{Band: \""$5"\", Desc: \"Band\", StartPos:",$3", EndPos:",$4", Giemsa: \""$6"\", Chr: &"label$2"}"}' \
    70  	| sed 's/\.\(.*=\)/_\1/'
    71  	echo -e ')\n'
    72  	echo 'var Bands = []*genome.Band{'
    73  	< $file zcat \
    74  	| grep -v '^#' \
    75  	| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
    76  	| awk '{print $1,$0}' \
    77  	| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \
    78  	| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
    79  	| sort -k1,1g -k3n,3 \
    80  	| awk  -v label=$label '{print "\t&"label$2"_"$5","}' \
    81  	| sed 's/\./_/'
    82  	echo '}'
    83  
    84  	# init
    85  	cat << 'END'
    86  //line parse.karyotype:82
    87  func init() {
    88  	for _, b := range Bands {
    89  		b.Chr.(*genome.Chromosome).Features = append(b.Chr.(*genome.Chromosome).Features, b)
    90  	}
    91  	for _, c := range Chromosomes {
    92  		bc := make([]feat.Feature, len(c.Features))
    93  		copy(bc, c.Features)
    94  		c.Features = bc
    95  	}
    96  }
    97  END
    98  ) | gofmt