github.com/biogo/biogo@v1.0.4/feat/genome/parse.karyotype (about) 1 #!/bin/bash 2 3 # This file parses UCSC Chromosome Band table into a Go source code file. 4 # 5 # This script will only work on the Chromosome Band table - it will not 6 # work on an 'Assembly' or 'Scaffold' table. 7 # 8 # The prefix, e.g. chr, will be used to label the chromosomes (e.g. chr1, chr2 ... ) 9 # By default, "chr" is used. The package will be used to name the generated package. 10 # 11 # To download data tables, see http://genome.ucsc.edu/cgi-bin/hgTables 12 13 file=$1 14 prefix=$2 15 species=$3 16 package=$4 17 18 if [ ! -n "$file" ]; then 19 echo "Please specify the UCSC karyotype table file" 20 exit 21 fi 22 23 if [ ! -n "$prefix" ]; then 24 prefix="chr" 25 fi 26 27 label="$(tr '[:lower:]' '[:upper:]' <<< ${prefix:0:1})${prefix:1}" 28 29 ( 30 echo -e "// DO NOT EDIT. This file was autogenerated by parse.karyotype\n" 31 echo "// Package $package defines chromosome and band intervals for the $species karyotype based on the $package assembly." 32 echo -e "package $package\n" 33 34 echo -e "import (\n\t\"github.com/biogo/biogo/feat\"\n\t\"github.com/biogo/biogo/feat/genome\"\n)\n" 35 36 # # chromosomes 37 echo 'var (' 38 < $file zcat \ 39 | grep -v '^#' \ 40 | sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \ 41 | awk '{print $1,$0}' \ 42 | sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \ 43 | sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \ 44 | sort -k1,1g -k3rn,3 \ 45 | sort -u -k1g,1 \ 46 | awk -v prefix=$prefix -v label=$label '{print "\t"label$2" = genome.Chromosome{Chr: \""prefix$2"\", Desc: \"Chromosome\", Length:",$4"}"}' 47 echo -e ')\n' 48 echo 'var Chromosomes = []*genome.Chromosome{' 49 < $file zcat \ 50 | grep -v '^#' \ 51 | sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \ 52 | awk '{print $1,$0}' \ 53 | sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \ 54 | sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \ 55 | sort -k1,1g -k3rn,3 \ 56 | sort -u -k1g,1 \ 57 | awk -v label=$label '{print "\t&"label$2","}' 58 echo -e '}\n' 59 60 # bands 61 echo 'var (' 62 < $file zcat \ 63 | grep -v '^#' \ 64 | sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \ 65 | awk '{print $1,$0}' \ 66 | sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \ 67 | sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \ 68 | sort -k1,1g -k3n,3 \ 69 | awk -v label=$label '{print "\t"label$2"_"$5" = genome.Band{Band: \""$5"\", Desc: \"Band\", StartPos:",$3", EndPos:",$4", Giemsa: \""$6"\", Chr: &"label$2"}"}' \ 70 | sed 's/\.\(.*=\)/_\1/' 71 echo -e ')\n' 72 echo 'var Bands = []*genome.Band{' 73 < $file zcat \ 74 | grep -v '^#' \ 75 | sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \ 76 | awk '{print $1,$0}' \ 77 | sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \ 78 | sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \ 79 | sort -k1,1g -k3n,3 \ 80 | awk -v label=$label '{print "\t&"label$2"_"$5","}' \ 81 | sed 's/\./_/' 82 echo '}' 83 84 # init 85 cat << 'END' 86 //line parse.karyotype:82 87 func init() { 88 for _, b := range Bands { 89 b.Chr.(*genome.Chromosome).Features = append(b.Chr.(*genome.Chromosome).Features, b) 90 } 91 for _, c := range Chromosomes { 92 bc := make([]feat.Feature, len(c.Features)) 93 copy(bc, c.Features) 94 c.Features = bc 95 } 96 } 97 END 98 ) | gofmt