github.com/biogo/biogo@v1.0.4/feat/genome/parse.assembly (about)

     1  #!/bin/bash
     2  
     3  # This file parses UCSC Chromosome Band table into a Go source code file.
     4  #
     5  # This script will only work on the Assembly table.
     6  #
     7  # The prefix, e.g. chr, will be used to label the chromosomes (e.g. chr1, chr2 ... )
     8  # By default, "chr" is used. The package will be used to name the generated package.
     9  #
    10  # To download data tables, see http://genome.ucsc.edu/cgi-bin/hgTables
    11  #
    12  # USE OF THIS SCRIPT WITHOUT A FILTER OR WITH NOFRAG UNSET
    13  # SHOULD IN MOST CASES BE SEEN AS COMPILER ABUSE.
    14  
    15  file=$1
    16  prefix=$2
    17  species=$3
    18  package=$4
    19  filter=$5
    20  nofrags=$6
    21  
    22  if [ -z "$file" ]; then
    23  	echo "Please specify the UCSC assembly table file"
    24  	exit
    25  fi
    26  
    27  if [ -z "$prefix" ]; then
    28  	prefix="chr"
    29  fi
    30  
    31  if [ -z "$filter" ]; then
    32  	filter="^$"
    33  fi
    34  
    35  label="$(tr '[:lower:]' '[:upper:]' <<< ${prefix:0:1})${prefix:1}"
    36  
    37  (
    38  	echo -e "// DO NOT EDIT. This file was autogenerated by parse.assembly\n"
    39  	echo "// Package $package defines chromosome and assembly fragment intervals for the $package genome assembly for $species."
    40  	echo -e "package $package\n"
    41  
    42  	echo "import ("
    43  	if [ -z "$nofrags" ]; then
    44  		echo -e "\t\"github.com/biogo/biogo/feat\""
    45  	fi
    46  	echo -e "\t\"github.com/biogo/biogo/feat/genome\"\n)\n"
    47  
    48  	# chromosomes
    49  	echo 'var ('
    50  	< $file zcat \
    51  	| grep -v '^#' \
    52  	| grep -v $filter \
    53  	| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
    54  	| awk '{print $2,$0}' \
    55  	| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' -e's/^Un[^ ]\+/2e10/' \
    56  	| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
    57  	| sort -k1,1g -k5rn,5 \
    58  	| sort -k1,1g -k3,3 -u \
    59  	| awk -v prefix=$prefix -v label=$label '{print "\t"label$3" = genome.Chromosome{Chr: \""prefix$3"\", Desc: \"Chromosome\", Length:",$5"}"}'
    60  	echo -e ')\n'
    61  	echo 'var Chromosomes = []*genome.Chromosome{'
    62  	< $file zcat \
    63  	| grep -v '^#' \
    64  	| grep -v $filter \
    65  	| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
    66  	| awk '{print $2,$0}' \
    67  	| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' -e's/^Un[^ ]\+/2e10/' \
    68  	| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
    69  	| sort -k1,1g -k5rn,5 \
    70  	| sort -k1,1g -k3,3 -u \
    71  	| awk  -v label=$label '{print "\t&"label$3","}'
    72  	echo -e '}\n'
    73  
    74  	# fragments
    75  	if [ -z "$nofrags" ]; then
    76  		echo 'var ('
    77  		< $file zcat \
    78  		| grep -v '^#' \
    79  		| grep -v $filter \
    80  		| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
    81  		| awk '{print $2,$0}' \
    82  		| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' -e's/^Un[^ ]\+/2e10/' \
    83  		| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
    84  		| sort -k1,1g -k3,3 \
    85  		| awk -v prefix=$prefix -v label=$label '{print "\t"label$3"_"$8"_"$4" = genome.Fragment{Frag: \""$8"\", Desc: \"Fragment\", Chr: &"label$3", ChrStart:",$4", ChrEnd: "$5", FragStart:",$9", FragEnd: "$10", Type: \x27"$7"\x27, Strand:",$11"1}"}' \
    86  		| sed 's/\.\(.*=\)/_\1/'
    87  		echo -e ')\n'
    88  		echo 'var Fragments = []*genome.Fragment{'
    89  		< $file zcat \
    90  		| grep -v '^#' \
    91  		| grep -v $filter \
    92  		| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
    93  		| awk '{print $2,$0}' \
    94  		| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' -e's/^Un[^ ]\+/2e10/' \
    95  		| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
    96  		| sort -k1,1g -k3,3 \
    97  		| awk -v prefix=$prefix -v label=$label '{print "\t&"label$3"_"$8"_"$46","}' \
    98  		| sed 's/\./_/'
    99  		echo -e '}\n'
   100  
   101  		# init
   102  		cat << 'END'
   103  //line parse.assembly:99
   104  func init() {
   105  	for _, b := range Fragments {
   106  		b.Chr.(*genome.Chromosome).Features = append(b.Chr.(*genome.Chromosome).Features, b)
   107  	}
   108  	for _, c := range Chromosomes {
   109  		fc := make([]feat.Feature, len(c.Features))
   110  		copy(fc, c.Features)
   111  		c.Features = fc
   112  	}
   113  }
   114  END
   115  	fi
   116  ) | gofmt