pycrate/pycrate_asn1dir/NCBI_201702/seq.asn

485 lines
19 KiB
Groff

--$Revision: 487074 $
--**********************************************************************
--
-- NCBI Sequence elements
-- by James Ostell, 1990
-- Version 3.0 - June 1994
--
--**********************************************************************
NCBI-Sequence DEFINITIONS ::=
BEGIN
EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext, Seq-gap;
IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
Seq-align FROM NCBI-Seqalign
Seq-feat, ModelEvidenceSupport FROM NCBI-Seqfeat
Seq-graph FROM NCBI-Seqres
Pub-equiv FROM NCBI-Pub
Org-ref FROM NCBI-Organism
BioSource FROM NCBI-BioSource
Seq-id, Seq-loc FROM NCBI-Seqloc
GB-block FROM GenBank-General
PIR-block FROM PIR-General
EMBL-block FROM EMBL-General
SP-block FROM SP-General
PRF-block FROM PRF-General
PDB-block FROM PDB-General
Seq-table FROM NCBI-SeqTable;
-- pycrate patch, taken from general.asn
StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
--*** Sequence ********************************
--*
Bioseq ::= SEQUENCE {
id SET OF Seq-id , -- equivalent identifiers
descr Seq-descr OPTIONAL , -- descriptors
inst Seq-inst , -- the sequence data
annot SET OF Seq-annot OPTIONAL }
--*** Descriptors *****************************
--*
Seq-descr ::= SET OF Seqdesc
Seqdesc ::= CHOICE {
mol-type GIBB-mol , -- type of molecule
modif SET OF GIBB-mod , -- modifiers
method GIBB-method , -- sequencing method
name VisibleString , -- a name for this sequence
title VisibleString , -- a title for this sequence
org Org-ref , -- if all from one organism
comment VisibleString , -- a more extensive comment
num Numbering , -- a numbering system
maploc Dbtag , -- map location of this sequence
pir PIR-block , -- PIR specific info
genbank GB-block , -- GenBank specific info
pub Pubdesc , -- a reference to the publication
region VisibleString , -- overall region (globin locus)
user User-object , -- user defined object
sp SP-block , -- SWISSPROT specific info
dbxref Dbtag , -- xref to other databases
embl EMBL-block , -- EMBL specific information
create-date Date , -- date entry first created/released
update-date Date , -- date of last update
prf PRF-block , -- PRF specific information
pdb PDB-block , -- PDB specific information
het Heterogen , -- cofactor, etc associated but not bound
source BioSource , -- source of materials, includes Org-ref
molinfo MolInfo , -- info on the molecule and techniques
modelev ModelEvidenceSupport -- model evidence for XM records
}
--******* NOTE:
--* mol-type, modif, method, and org are consolidated and expanded
--* in Org-ref, BioSource, and MolInfo in this specification. They
--* will be removed in later specifications. Do not use them in the
--* the future. Instead expect the new structures.
--*
--***************************
--********************************************************************
--
-- MolInfo gives information on the
-- classification of the type and quality of the sequence
--
-- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
--
--********************************************************************
MolInfo ::= SEQUENCE {
biomol INTEGER {
unknown (0) ,
genomic (1) ,
pre-RNA (2) , -- precursor RNA of any sort really
mRNA (3) ,
rRNA (4) ,
tRNA (5) ,
snRNA (6) ,
scRNA (7) ,
peptide (8) ,
other-genetic (9) , -- other genetic material
genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence
cRNA (11) , -- viral RNA genome copy intermediate
snoRNA (12) , -- small nucleolar RNA
transcribed-RNA (13) , -- transcribed RNA other than existing classes
ncRNA (14) ,
tmRNA (15) ,
other (255) } DEFAULT unknown ,
tech INTEGER {
unknown (0) ,
standard (1) , -- standard sequencing
est (2) , -- Expressed Sequence Tag
sts (3) , -- Sequence Tagged Site
survey (4) , -- one-pass genomic sequence
genemap (5) , -- from genetic mapping techniques
physmap (6) , -- from physical mapping techniques
derived (7) , -- derived from other data, not a primary entity
concept-trans (8) , -- conceptual translation
seq-pept (9) , -- peptide was sequenced
both (10) , -- concept transl. w/ partial pept. seq.
seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
seq-pept-homol (12) , -- sequenced peptide, ordered by homology
concept-trans-a (13) , -- conceptual transl. supplied by author
htgs-1 (14) , -- unordered High Throughput sequence contig
htgs-2 (15) , -- ordered High Throughput sequence contig
htgs-3 (16) , -- finished High Throughput sequence
fli-cdna (17) , -- full length insert cDNA
htgs-0 (18) , -- single genomic reads for coordination
htc (19) , -- high throughput cDNA
wgs (20) , -- whole genome shotgun sequencing
barcode (21) , -- barcode of life project
composite-wgs-htgs (22) , -- composite of WGS and HTGS
tsa (23) , -- transcriptome shotgun assembly
targeted (24) , -- targeted locus sets/studies
other (255) } -- use Source.techexp
DEFAULT unknown ,
techexp VisibleString OPTIONAL , -- explanation if tech not enough
--
-- Completeness is not indicated in most records. For genomes, assume
-- the sequences are incomplete unless specifically marked as complete.
-- For mRNAs, assume the ends are not known exactly unless marked as
-- having the left or right end.
--
completeness INTEGER {
unknown (0) ,
complete (1) , -- complete biological entity
partial (2) , -- partial but no details given
no-left (3) , -- missing 5' or NH3 end
no-right (4) , -- missing 3' or COOH end
no-ends (5) , -- missing both ends
has-left (6) , -- 5' or NH3 end present
has-right (7) , -- 3' or COOH end present
other (255) } DEFAULT unknown ,
gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA
GIBB-mol ::= ENUMERATED { -- type of molecule represented
unknown (0) ,
genomic (1) ,
pre-mRNA (2) , -- precursor RNA of any sort really
mRNA (3) ,
rRNA (4) ,
tRNA (5) ,
snRNA (6) ,
scRNA (7) ,
peptide (8) ,
other-genetic (9) , -- other genetic material
genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence
other (255) }
GIBB-mod ::= ENUMERATED { -- GenInfo Backbone modifiers
dna (0) ,
rna (1) ,
extrachrom (2) ,
plasmid (3) ,
mitochondrial (4) ,
chloroplast (5) ,
kinetoplast (6) ,
cyanelle (7) ,
synthetic (8) ,
recombinant (9) ,
partial (10) ,
complete (11) ,
mutagen (12) , -- subject of mutagenesis ?
natmut (13) , -- natural mutant ?
transposon (14) ,
insertion-seq (15) ,
no-left (16) , -- missing left end (5' for na, NH2 for aa)
no-right (17) , -- missing right end (3' or COOH)
macronuclear (18) ,
proviral (19) ,
est (20) , -- expressed sequence tag
sts (21) , -- sequence tagged site
survey (22) , -- one pass survey sequence
chromoplast (23) ,
genemap (24) , -- is a genetic map
restmap (25) , -- is an ordered restriction map
physmap (26) , -- is a physical map (not ordered restriction map)
other (255) }
GIBB-method ::= ENUMERATED { -- sequencing methods
concept-trans (1) , -- conceptual translation
seq-pept (2) , -- peptide was sequenced
both (3) , -- concept transl. w/ partial pept. seq.
seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
seq-pept-homol (5) , -- sequenced peptide, ordered by homology
concept-trans-a (6) , -- conceptual transl. supplied by author
other (255) }
Numbering ::= CHOICE { -- any display numbering system
cont Num-cont , -- continuous numbering
enum Num-enum , -- enumerated names for residues
ref Num-ref , -- by reference to another sequence
real Num-real } -- supports mapping to a float system
Num-cont ::= SEQUENCE { -- continuous display numbering system
refnum INTEGER DEFAULT 1, -- number assigned to first residue
has-zero BOOLEAN DEFAULT FALSE , -- 0 used?
ascending BOOLEAN DEFAULT TRUE } -- ascending numbers?
Num-enum ::= SEQUENCE { -- any tags to residues
num INTEGER , -- number of tags to follow
names SEQUENCE OF VisibleString } -- the tags
Num-ref ::= SEQUENCE { -- by reference to other sequences
type ENUMERATED { -- type of reference
not-set (0) ,
sources (1) , -- by segmented or const seq sources
aligns (2) } , -- by alignments given below
aligns Seq-align OPTIONAL }
Num-real ::= SEQUENCE { -- mapping to floating point system
a REAL , -- from an integer system used by Bioseq
b REAL , -- position = (a * int_position) + b
units VisibleString OPTIONAL }
Pubdesc ::= SEQUENCE { -- how sequence presented in pub
pub Pub-equiv , -- the citation(s)
name VisibleString OPTIONAL , -- name used in paper
fig VisibleString OPTIONAL , -- figure in paper
num Numbering OPTIONAL , -- numbering from paper
numexc BOOLEAN OPTIONAL , -- numbering problem with paper
poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure?
maploc VisibleString OPTIONAL , -- map location reported in paper
seq-raw StringStore OPTIONAL , -- original sequence from paper
align-group INTEGER OPTIONAL , -- this seq aligned with others in paper
comment VisibleString OPTIONAL, -- any comment on this pub in context
reftype INTEGER { -- type of reference in a GenBank record
seq (0) , -- refers to sequence
sites (1) , -- refers to unspecified features
feats (2) , -- refers to specified features
no-target (3) } -- nothing specified (EMBL)
DEFAULT seq }
Heterogen ::= VisibleString -- cofactor, prosthetic group, inhibitor, etc
--*** Instances of sequences *******************************
--*
Seq-inst ::= SEQUENCE { -- the sequence data itself
repr ENUMERATED { -- representation class
not-set (0) , -- empty
virtual (1) , -- no seq data
raw (2) , -- continuous sequence
seg (3) , -- segmented sequence
const (4) , -- constructed sequence
ref (5) , -- reference to another sequence
consen (6) , -- consensus sequence or pattern
map (7) , -- ordered map of any kind
delta (8) , -- sequence made by changes (delta) to others
other (255) } ,
mol ENUMERATED { -- molecule class in living organism
not-set (0) , -- > cdna = rna
dna (1) ,
rna (2) ,
aa (3) ,
na (4) , -- just a nucleic acid
other (255) } ,
length INTEGER OPTIONAL , -- length of sequence in residues
fuzz Int-fuzz OPTIONAL , -- length uncertainty
topology ENUMERATED { -- topology of molecule
not-set (0) ,
linear (1) ,
circular (2) ,
tandem (3) , -- some part of tandem repeat
other (255) } DEFAULT linear ,
strand ENUMERATED { -- strandedness in living organism
not-set (0) ,
ss (1) , -- single strand
ds (2) , -- double strand
mixed (3) ,
other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept
seq-data Seq-data OPTIONAL , -- the sequence
ext Seq-ext OPTIONAL , -- extensions for special types
hist Seq-hist OPTIONAL } -- sequence history
--*** Sequence Extensions **********************************
--* for representing more complex types
--* const type uses Seq-hist.assembly
Seq-ext ::= CHOICE {
seg Seg-ext , -- segmented sequences
ref Ref-ext , -- hot link to another sequence (a view)
map Map-ext , -- ordered map of markers
delta Delta-ext }
Seg-ext ::= SEQUENCE OF Seq-loc
Ref-ext ::= Seq-loc
Map-ext ::= SEQUENCE OF Seq-feat
Delta-ext ::= SEQUENCE OF Delta-seq
Delta-seq ::= CHOICE {
loc Seq-loc , -- point to a sequence
literal Seq-literal } -- a piece of sequence
Seq-literal ::= SEQUENCE {
length INTEGER , -- must give a length in residues
fuzz Int-fuzz OPTIONAL , -- could be unsure
seq-data Seq-data OPTIONAL } -- may have the data
--*** Sequence History Record ***********************************
--** assembly = records how seq was assembled from others
--** replaces = records sequences made obsolete by this one
--** replaced-by = this seq is made obsolete by another(s)
Seq-hist ::= SEQUENCE {
assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete
replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
deleted CHOICE {
bool BOOLEAN ,
date Date } OPTIONAL }
Seq-hist-rec ::= SEQUENCE {
date Date OPTIONAL ,
ids SET OF Seq-id }
--*** Various internal sequence representations ************
--* all are controlled, fixed length forms
Seq-data ::= CHOICE { -- sequence representations
iupacna IUPACna , -- IUPAC 1 letter nuc acid code
iupacaa IUPACaa , -- IUPAC 1 letter amino acid code
ncbi2na NCBI2na , -- 2 bit nucleic acid code
ncbi4na NCBI4na , -- 4 bit nucleic acid code
ncbi8na NCBI8na , -- 8 bit extended nucleic acid code
ncbipna NCBIpna , -- nucleic acid probabilities
ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes
ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes
ncbipaa NCBIpaa , -- amino acid probabilities
ncbistdaa NCBIstdaa, -- consecutive codes for std aas
gap Seq-gap -- gap types
}
Seq-gap ::= SEQUENCE {
type INTEGER {
unknown(0),
fragment(1), -- Deprecated. Used only for AGP 1.1
clone(2), -- Deprecated. Used only for AGP 1.1
short-arm(3),
heterochromatin(4),
centromere(5),
telomere(6),
repeat(7),
contig(8),
scaffold(9),
other(255)
},
linkage INTEGER {
unlinked(0),
linked(1),
other(255)
} OPTIONAL,
linkage-evidence SET OF Linkage-evidence OPTIONAL
}
Linkage-evidence ::= SEQUENCE {
type INTEGER {
paired-ends(0),
align-genus(1),
align-xgenus(2),
align-trnscpt(3),
within-clone(4),
clone-contig(5),
map(6),
strobe(7),
unspecified(8),
pcr(9),
other(255)
}
}
IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces
IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces
NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T
NCBI4na ::= OCTET STRING -- 1 bit each for agct
-- 0001=A, 0010=C, 0100=G, 1000=T/U
-- 0101=Purine, 1010=Pyrimidine, etc
NCBI8na ::= OCTET STRING -- for modified nucleic acids
NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n
-- probabilities are coded 0-255 = 0.0-1.0
NCBI8aa ::= OCTET STRING -- for modified amino acids
NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes
-- IUPAC codes + U=selenocysteine
NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order:
-- A-Y,B,Z,X,(ter),anything
-- probabilities are coded 0-255 = 0.0-1.0
NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte
--*** Sequence Annotation *************************************
--*
-- This is a replica of Textseq-id
-- This is specific for annotations, and exists to maintain a semantic
-- difference between IDs assigned to annotations and IDs assigned to
-- sequences
Textannot-id ::= SEQUENCE {
name VisibleString OPTIONAL ,
accession VisibleString OPTIONAL ,
release VisibleString OPTIONAL ,
version INTEGER OPTIONAL
}
Annot-id ::= CHOICE {
local Object-id ,
ncbi INTEGER ,
general Dbtag,
other Textannot-id
}
Annot-descr ::= SET OF Annotdesc
Annotdesc ::= CHOICE {
name VisibleString , -- a short name for this collection
title VisibleString , -- a title for this collection
comment VisibleString , -- a more extensive comment
pub Pubdesc , -- a reference to the publication
user User-object , -- user defined object
create-date Date , -- date entry first created/released
update-date Date , -- date of last update
src Seq-id , -- source sequence from which annot came
align Align-def, -- definition of the SeqAligns
region Seq-loc } -- all contents cover this region
Align-def ::= SEQUENCE {
align-type INTEGER { -- class of align Seq-annot
ref (1) , -- set of alignments to the same sequence
alt (2) , -- set of alternate alignments of the same seqs
blocks (3) , -- set of aligned blocks in the same seqs
other (255) } ,
ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
Seq-annot ::= SEQUENCE {
id SET OF Annot-id OPTIONAL ,
db INTEGER { -- source of annotation
genbank (1) ,
embl (2) ,
ddbj (3) ,
pir (4) ,
sp (5) ,
bbone (6) ,
pdb (7) ,
other (255) } OPTIONAL ,
name VisibleString OPTIONAL ,-- source if "other" above
desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots
data CHOICE {
ftable SET OF Seq-feat ,
align SET OF Seq-align ,
graph SET OF Seq-graph ,
ids SET OF Seq-id , -- used for communication between tools
locs SET OF Seq-loc , -- used for communication between tools
seq-table Seq-table } } -- features in table form
END