pycrate/pycrate_asn1dir/NCBI_201702/seqalign.asn

296 lines
8.8 KiB
Groff

--$Revision: 370567 $
--**********************************************************************
--
-- NCBI Sequence Alignment elements
-- by James Ostell, 1990
--
--**********************************************************************
NCBI-Seqalign DEFINITIONS ::=
BEGIN
EXPORTS Seq-align, Score, Score-set, Seq-align-set;
IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
User-object, Object-id FROM NCBI-General;
--*** Sequence Alignment ********************************
--*
Seq-align-set ::= SET OF Seq-align
Seq-align ::= SEQUENCE {
type ENUMERATED {
not-set (0) ,
global (1) ,
diags (2) , -- unbroken, but not ordered, diagonals
partial (3) , -- mapping pieces together
disc (4) , -- discontinuous alignment
other (255) } ,
dim INTEGER OPTIONAL , -- dimensionality
score SET OF Score OPTIONAL , -- for whole alignment
segs CHOICE { -- alignment data
dendiag SEQUENCE OF Dense-diag ,
denseg Dense-seg ,
std SEQUENCE OF Std-seg ,
packed Packed-seg ,
disc Seq-align-set,
spliced Spliced-seg,
sparse Sparse-seg
} ,
-- regions of sequence over which align
-- was computed
bounds SET OF Seq-loc OPTIONAL,
-- alignment id
id SEQUENCE OF Object-id OPTIONAL,
--extra info
ext SEQUENCE OF User-object OPTIONAL
}
Dense-diag ::= SEQUENCE { -- for (multiway) diagonals
dim INTEGER DEFAULT 2 , -- dimensionality
ids SEQUENCE OF Seq-id , -- sequences in order
starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order
len INTEGER , -- len of aligned segments
strands SEQUENCE OF Na-strand OPTIONAL ,
scores SET OF Score OPTIONAL }
-- Dense-seg: the densist packing for sequence alignments only.
-- a start of -1 indicates a gap for that sequence of
-- length lens.
--
-- id=100 AAGGCCTTTTAGAGATGATGATGATGATGA
-- id=200 AAGGCCTTTTAG.......GATGATGATGA
-- id=300 ....CCTTTTAGAGATGATGAT....ATGA
--
-- dim = 3, numseg = 6, ids = { 100, 200, 300 }
-- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
-- lens = { 4, 8, 7, 3, 4, 4 }
--
Dense-seg ::= SEQUENCE { -- for (multiway) global or partial alignments
dim INTEGER DEFAULT 2 , -- dimensionality
numseg INTEGER , -- number of segments here
ids SEQUENCE OF Seq-id , -- sequences in order
starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order within segs
lens SEQUENCE OF INTEGER , -- lengths in ids order within segs
strands SEQUENCE OF Na-strand OPTIONAL ,
scores SEQUENCE OF Score OPTIONAL } -- score for each seg
Packed-seg ::= SEQUENCE { -- for (multiway) global or partial alignments
dim INTEGER DEFAULT 2 , -- dimensionality
numseg INTEGER , -- number of segments here
ids SEQUENCE OF Seq-id , -- sequences in order
starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order for whole alignment
present OCTET STRING , -- Boolean if each sequence present or absent in
-- each segment
lens SEQUENCE OF INTEGER , -- length of each segment
strands SEQUENCE OF Na-strand OPTIONAL ,
scores SEQUENCE OF Score OPTIONAL } -- score for each segment
Std-seg ::= SEQUENCE {
dim INTEGER DEFAULT 2 , -- dimensionality
ids SEQUENCE OF Seq-id OPTIONAL ,
loc SEQUENCE OF Seq-loc ,
scores SET OF Score OPTIONAL }
Spliced-seg ::= SEQUENCE {
-- product is either protein or transcript (cDNA)
product-id Seq-id OPTIONAL,
genomic-id Seq-id OPTIONAL,
-- should be 'plus' or 'minus'
product-strand Na-strand OPTIONAL ,
genomic-strand Na-strand OPTIONAL ,
product-type ENUMERATED {
transcript(0),
protein(1)
},
-- set of segments involved
-- each segment corresponds to one exon
-- exons are always in biological order
exons SEQUENCE OF Spliced-exon ,
-- start of poly(A) tail on the transcript
-- For sense transcripts:
-- aligned product positions < poly-a <= product-length
-- poly-a == product-length indicates inferred poly(A) tail at transcript's end
-- For antisense transcripts:
-- -1 <= poly-a < aligned product positions
-- poly-a == -1 indicates inferred poly(A) tail at transcript's start
poly-a INTEGER OPTIONAL,
-- length of the product, in bases/residues
-- from this (or from poly-a if present), a 3' unaligned length can be extracted
product-length INTEGER OPTIONAL,
-- alignment descriptors / modifiers
-- this provides us a set for extension
modifiers SET OF Spliced-seg-modifier OPTIONAL
}
Spliced-seg-modifier ::= CHOICE {
-- protein aligns from the start and the first codon
-- on both product and genomic is start codon
start-codon-found BOOLEAN,
-- protein aligns to it's end and there is stop codon
-- on the genomic right after the alignment
stop-codon-found BOOLEAN
}
-- complete or partial exon
-- two consecutive Spliced-exons may belong to one exon
Spliced-exon ::= SEQUENCE {
-- product-end >= product-start
product-start Product-pos ,
product-end Product-pos ,
-- genomic-end >= genomic-start
genomic-start INTEGER ,
genomic-end INTEGER ,
-- product is either protein or transcript (cDNA)
product-id Seq-id OPTIONAL ,
genomic-id Seq-id OPTIONAL ,
-- should be 'plus' or 'minus'
product-strand Na-strand OPTIONAL ,
-- genomic-strand represents the strand of translation
genomic-strand Na-strand OPTIONAL ,
-- basic seqments always are in biologic order
parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,
-- scores for this exon
scores Score-set OPTIONAL ,
-- splice sites
acceptor-before-exon Splice-site OPTIONAL,
donor-after-exon Splice-site OPTIONAL,
-- flag: is this exon complete or partial?
partial BOOLEAN OPTIONAL,
--extra info
ext SEQUENCE OF User-object OPTIONAL
}
Product-pos ::= CHOICE {
nucpos INTEGER,
protpos Prot-pos
}
-- position on protein (1/3 of amino-acid resolution)
Prot-pos ::= SEQUENCE {
-- amino-acid position (0-based)
amin INTEGER ,
-- position within codon (1-based)
-- 0 = not set (meaning 1)
frame INTEGER DEFAULT 0
}
-- Spliced-exon-chunk: piece of an exon
-- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
-- protein)
Spliced-exon-chunk ::= CHOICE {
-- both sequences represented, product and genomic sequences match
match INTEGER ,
-- both sequences represented, product and genomic sequences do not match
mismatch INTEGER ,
-- both sequences are represented, there is sufficient similarity
-- between product and genomic sequences. Can be used to replace stretches
-- of matches and mismatches, mostly for protein to genomic where
-- definition of match or mismatch depends on translation table
diag INTEGER ,
-- insertion in product sequence (i.e. gap in the genomic sequence)
product-ins INTEGER ,
-- insertion in genomic sequence (i.e. gap in the product sequence)
genomic-ins INTEGER
}
-- site involved in splice
Splice-site ::= SEQUENCE {
-- typically two bases in the intronic region, always
-- in IUPAC format
bases VisibleString
}
-- ==========================================================================
--
-- Sparse-seg follows the semantics of dense-seg and is more optimal for
-- representing sparse multiple alignments
--
-- ==========================================================================
Sparse-seg ::= SEQUENCE {
master-id Seq-id OPTIONAL,
-- pairwise alignments constituting this multiple alignment
rows SET OF Sparse-align,
-- per-row scores
row-scores SET OF Score OPTIONAL,
-- index of extra items
ext SET OF Sparse-seg-ext OPTIONAL
}
Sparse-align ::= SEQUENCE {
first-id Seq-id,
second-id Seq-id,
numseg INTEGER, --number of segments
first-starts SEQUENCE OF INTEGER , --starts on the first sequence [numseg]
second-starts SEQUENCE OF INTEGER , --starts on the second sequence [numseg]
lens SEQUENCE OF INTEGER , --lengths of segments [numseg]
second-strands SEQUENCE OF Na-strand OPTIONAL ,
-- per-segment scores
seg-scores SET OF Score OPTIONAL
}
Sparse-seg-ext ::= SEQUENCE {
--seg-ext SET OF {
-- index INTEGER,
-- data User-field
-- }
index INTEGER
}
-- use of Score is discouraged for external ASN.1 specifications
Score ::= SEQUENCE {
id Object-id OPTIONAL ,
value CHOICE {
real REAL ,
int INTEGER
}
}
-- use of Score-set is encouraged for external ASN.1 specifications
Score-set ::= SET OF Score
END