github.com/biogo/biogo@v1.0.4/feat/genome/parse.assembly (about) 1 #!/bin/bash 2 3 # This file parses UCSC Chromosome Band table into a Go source code file. 4 # 5 # This script will only work on the Assembly table. 6 # 7 # The prefix, e.g. chr, will be used to label the chromosomes (e.g. chr1, chr2 ... ) 8 # By default, "chr" is used. The package will be used to name the generated package. 9 # 10 # To download data tables, see http://genome.ucsc.edu/cgi-bin/hgTables 11 # 12 # USE OF THIS SCRIPT WITHOUT A FILTER OR WITH NOFRAG UNSET 13 # SHOULD IN MOST CASES BE SEEN AS COMPILER ABUSE. 14 15 file=$1 16 prefix=$2 17 species=$3 18 package=$4 19 filter=$5 20 nofrags=$6 21 22 if [ -z "$file" ]; then 23 echo "Please specify the UCSC assembly table file" 24 exit 25 fi 26 27 if [ -z "$prefix" ]; then 28 prefix="chr" 29 fi 30 31 if [ -z "$filter" ]; then 32 filter="^$" 33 fi 34 35 label="$(tr '[:lower:]' '[:upper:]' <<< ${prefix:0:1})${prefix:1}" 36 37 ( 38 echo -e "// DO NOT EDIT. This file was autogenerated by parse.assembly\n" 39 echo "// Package $package defines chromosome and assembly fragment intervals for the $package genome assembly for $species." 40 echo -e "package $package\n" 41 42 echo "import (" 43 if [ -z "$nofrags" ]; then 44 echo -e "\t\"github.com/biogo/biogo/feat\"" 45 fi 46 echo -e "\t\"github.com/biogo/biogo/feat/genome\"\n)\n" 47 48 # chromosomes 49 echo 'var (' 50 < $file zcat \ 51 | grep -v '^#' \ 52 | grep -v $filter \ 53 | sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \ 54 | awk '{print $2,$0}' \ 55 | sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' -e's/^Un[^ ]\+/2e10/' \ 56 | sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \ 57 | sort -k1,1g -k5rn,5 \ 58 | sort -k1,1g -k3,3 -u \ 59 | awk -v prefix=$prefix -v label=$label '{print "\t"label$3" = genome.Chromosome{Chr: \""prefix$3"\", Desc: \"Chromosome\", Length:",$5"}"}' 60 echo -e ')\n' 61 echo 'var Chromosomes = []*genome.Chromosome{' 62 < $file zcat \ 63 | grep -v '^#' \ 64 | grep -v $filter \ 65 | sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \ 66 | awk '{print $2,$0}' \ 67 | sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' -e's/^Un[^ ]\+/2e10/' \ 68 | sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \ 69 | sort -k1,1g -k5rn,5 \ 70 | sort -k1,1g -k3,3 -u \ 71 | awk -v label=$label '{print "\t&"label$3","}' 72 echo -e '}\n' 73 74 # fragments 75 if [ -z "$nofrags" ]; then 76 echo 'var (' 77 < $file zcat \ 78 | grep -v '^#' \ 79 | grep -v $filter \ 80 | sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \ 81 | awk '{print $2,$0}' \ 82 | sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' -e's/^Un[^ ]\+/2e10/' \ 83 | sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \ 84 | sort -k1,1g -k3,3 \ 85 | awk -v prefix=$prefix -v label=$label '{print "\t"label$3"_"$8"_"$4" = genome.Fragment{Frag: \""$8"\", Desc: \"Fragment\", Chr: &"label$3", ChrStart:",$4", ChrEnd: "$5", FragStart:",$9", FragEnd: "$10", Type: \x27"$7"\x27, Strand:",$11"1}"}' \ 86 | sed 's/\.\(.*=\)/_\1/' 87 echo -e ')\n' 88 echo 'var Fragments = []*genome.Fragment{' 89 < $file zcat \ 90 | grep -v '^#' \ 91 | grep -v $filter \ 92 | sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \ 93 | awk '{print $2,$0}' \ 94 | sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' -e's/^Un[^ ]\+/2e10/' \ 95 | sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \ 96 | sort -k1,1g -k3,3 \ 97 | awk -v prefix=$prefix -v label=$label '{print "\t&"label$3"_"$8"_"$46","}' \ 98 | sed 's/\./_/' 99 echo -e '}\n' 100 101 # init 102 cat << 'END' 103 //line parse.assembly:99 104 func init() { 105 for _, b := range Fragments { 106 b.Chr.(*genome.Chromosome).Features = append(b.Chr.(*genome.Chromosome).Features, b) 107 } 108 for _, c := range Chromosomes { 109 fc := make([]feat.Feature, len(c.Features)) 110 copy(fc, c.Features) 111 c.Features = fc 112 } 113 } 114 END 115 fi 116 ) | gofmt