1397 lines
49 KiB
Groff
1397 lines
49 KiB
Groff
--$Revision: 509379 $
|
|
--**********************************************************************
|
|
--
|
|
-- NCBI Sequence Feature elements
|
|
-- by James Ostell, 1990
|
|
-- Version 3.0 - June 1994
|
|
--
|
|
--**********************************************************************
|
|
|
|
NCBI-Seqfeat DEFINITIONS ::=
|
|
BEGIN
|
|
|
|
EXPORTS Seq-feat, Feat-id, Genetic-code, ModelEvidenceSupport;
|
|
|
|
IMPORTS Gene-ref FROM NCBI-Gene
|
|
Prot-ref FROM NCBI-Protein
|
|
Org-ref FROM NCBI-Organism
|
|
Variation-ref FROM NCBI-Variation
|
|
BioSource FROM NCBI-BioSource
|
|
RNA-ref FROM NCBI-RNA
|
|
Seq-id, Seq-loc, Giimport-id FROM NCBI-Seqloc
|
|
Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
|
|
Rsite-ref FROM NCBI-Rsite
|
|
Txinit FROM NCBI-TxInit
|
|
DOI, PubMedId FROM NCBI-Biblio
|
|
Pub-set FROM NCBI-Pub
|
|
Object-id, Dbtag, User-object FROM NCBI-General;
|
|
|
|
--*** Feature identifiers ********************************
|
|
--*
|
|
|
|
Feat-id ::= CHOICE {
|
|
gibb INTEGER , -- geninfo backbone
|
|
giim Giimport-id , -- geninfo import
|
|
local Object-id , -- for local software use
|
|
general Dbtag } -- for use by various databases
|
|
|
|
--*** Seq-feat *******************************************
|
|
--* sequence feature generalization
|
|
|
|
Seq-feat ::= SEQUENCE {
|
|
id Feat-id OPTIONAL ,
|
|
data SeqFeatData , -- the specific data
|
|
partial BOOLEAN OPTIONAL , -- incomplete in some way?
|
|
except BOOLEAN OPTIONAL , -- something funny about this?
|
|
comment VisibleString OPTIONAL ,
|
|
product Seq-loc OPTIONAL , -- product of process
|
|
location Seq-loc , -- feature made from
|
|
qual SEQUENCE OF Gb-qual OPTIONAL , -- qualifiers
|
|
title VisibleString OPTIONAL , -- for user defined label
|
|
ext User-object OPTIONAL , -- user defined structure extension
|
|
cit Pub-set OPTIONAL , -- citations for this feature
|
|
exp-ev ENUMERATED { -- evidence for existence of feature
|
|
experimental (1) , -- any reasonable experimental check
|
|
not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
|
|
xref SET OF SeqFeatXref OPTIONAL , -- cite other relevant features
|
|
dbxref SET OF Dbtag OPTIONAL , -- support for xref to other databases
|
|
pseudo BOOLEAN OPTIONAL , -- annotated on pseudogene?
|
|
except-text VisibleString OPTIONAL , -- explain if except=TRUE
|
|
ids SET OF Feat-id OPTIONAL , -- set of Ids; will replace 'id' field
|
|
exts SET OF User-object OPTIONAL , -- set of extensions; will replace 'ext' field
|
|
support SeqFeatSupport OPTIONAL -- will replace /experiment, /inference, model-evidence
|
|
}
|
|
|
|
SeqFeatData ::= CHOICE {
|
|
gene Gene-ref ,
|
|
org Org-ref ,
|
|
cdregion Cdregion ,
|
|
prot Prot-ref ,
|
|
rna RNA-ref ,
|
|
pub Pubdesc , -- publication applies to this seq
|
|
seq Seq-loc , -- to annotate origin from another seq
|
|
imp Imp-feat ,
|
|
region VisibleString, -- named region (globin locus)
|
|
comment NULL , -- just a comment
|
|
bond ENUMERATED {
|
|
disulfide (1) ,
|
|
thiolester (2) ,
|
|
xlink (3) ,
|
|
thioether (4) ,
|
|
other (255) } ,
|
|
site ENUMERATED {
|
|
active (1) ,
|
|
binding (2) ,
|
|
cleavage (3) ,
|
|
inhibit (4) ,
|
|
modified (5),
|
|
glycosylation (6) ,
|
|
myristoylation (7) ,
|
|
mutagenized (8) ,
|
|
metal-binding (9) ,
|
|
phosphorylation (10) ,
|
|
acetylation (11) ,
|
|
amidation (12) ,
|
|
methylation (13) ,
|
|
hydroxylation (14) ,
|
|
sulfatation (15) ,
|
|
oxidative-deamination (16) ,
|
|
pyrrolidone-carboxylic-acid (17) ,
|
|
gamma-carboxyglutamic-acid (18) ,
|
|
blocked (19) ,
|
|
lipid-binding (20) ,
|
|
np-binding (21) ,
|
|
dna-binding (22) ,
|
|
signal-peptide (23) ,
|
|
transit-peptide (24) ,
|
|
transmembrane-region (25) ,
|
|
nitrosylation (26) ,
|
|
other (255) } ,
|
|
rsite Rsite-ref , -- restriction site (for maps really)
|
|
user User-object , -- user defined structure
|
|
txinit Txinit , -- transcription initiation
|
|
num Numbering , -- a numbering system
|
|
psec-str ENUMERATED { -- protein secondary structure
|
|
helix (1) , -- any helix
|
|
sheet (2) , -- beta sheet
|
|
turn (3) } , -- beta or gamma turn
|
|
non-std-residue VisibleString , -- non-standard residue here in seq
|
|
het Heterogen , -- cofactor, prosthetic grp, etc, bound to seq
|
|
biosrc BioSource,
|
|
clone Clone-ref,
|
|
variation Variation-ref
|
|
}
|
|
|
|
SeqFeatXref ::= SEQUENCE { -- both optional because can have one or both
|
|
id Feat-id OPTIONAL , -- the feature copied
|
|
data SeqFeatData OPTIONAL } -- the specific data
|
|
|
|
SeqFeatSupport ::= SEQUENCE {
|
|
experiment SET OF ExperimentSupport OPTIONAL ,
|
|
inference SET OF InferenceSupport OPTIONAL ,
|
|
model-evidence SET OF ModelEvidenceSupport OPTIONAL
|
|
}
|
|
|
|
EvidenceCategory ::= INTEGER {
|
|
not-set (0) ,
|
|
coordinates (1) ,
|
|
description (2) ,
|
|
existence (3)
|
|
}
|
|
|
|
ExperimentSupport ::= SEQUENCE {
|
|
category EvidenceCategory OPTIONAL ,
|
|
explanation VisibleString ,
|
|
pmids SET OF PubMedId OPTIONAL ,
|
|
dois SET OF DOI OPTIONAL
|
|
}
|
|
|
|
Program-id ::= SEQUENCE {
|
|
name VisibleString ,
|
|
version VisibleString OPTIONAL
|
|
}
|
|
|
|
EvidenceBasis ::= SEQUENCE {
|
|
programs SET OF Program-id OPTIONAL ,
|
|
accessions SET OF Seq-id OPTIONAL
|
|
}
|
|
|
|
InferenceSupport ::= SEQUENCE {
|
|
category EvidenceCategory OPTIONAL ,
|
|
type INTEGER {
|
|
not-set (0) ,
|
|
similar-to-sequence (1) ,
|
|
similar-to-aa (2) ,
|
|
similar-to-dna (3) ,
|
|
similar-to-rna (4) ,
|
|
similar-to-mrna (5) ,
|
|
similiar-to-est (6) ,
|
|
similar-to-other-rna (7) ,
|
|
profile (8) ,
|
|
nucleotide-motif (9) ,
|
|
protein-motif (10) ,
|
|
ab-initio-prediction (11) ,
|
|
alignment (12) ,
|
|
other (255)
|
|
} DEFAULT not-set ,
|
|
other-type VisibleString OPTIONAL ,
|
|
same-species BOOLEAN DEFAULT FALSE ,
|
|
basis EvidenceBasis ,
|
|
pmids SET OF PubMedId OPTIONAL ,
|
|
dois SET OF DOI OPTIONAL
|
|
}
|
|
|
|
ModelEvidenceItem ::= SEQUENCE {
|
|
id Seq-id ,
|
|
exon-count INTEGER OPTIONAL ,
|
|
exon-length INTEGER OPTIONAL ,
|
|
full-length BOOLEAN DEFAULT FALSE ,
|
|
supports-all-exon-combo BOOLEAN DEFAULT FALSE
|
|
}
|
|
|
|
ModelEvidenceSupport ::= SEQUENCE {
|
|
method VisibleString OPTIONAL ,
|
|
mrna SET OF ModelEvidenceItem OPTIONAL ,
|
|
est SET OF ModelEvidenceItem OPTIONAL ,
|
|
protein SET OF ModelEvidenceItem OPTIONAL ,
|
|
identification Seq-id OPTIONAL ,
|
|
dbxref SET OF Dbtag OPTIONAL ,
|
|
exon-count INTEGER OPTIONAL ,
|
|
exon-length INTEGER OPTIONAL ,
|
|
full-length BOOLEAN DEFAULT FALSE ,
|
|
supports-all-exon-combo BOOLEAN DEFAULT FALSE
|
|
}
|
|
|
|
--*** CdRegion ***********************************************
|
|
--*
|
|
--* Instructions to translate from a nucleic acid to a peptide
|
|
--* conflict means it's supposed to translate but doesn't
|
|
--*
|
|
|
|
|
|
Cdregion ::= SEQUENCE {
|
|
orf BOOLEAN OPTIONAL , -- just an ORF ?
|
|
frame ENUMERATED {
|
|
not-set (0) , -- not set, code uses one
|
|
one (1) ,
|
|
two (2) ,
|
|
three (3) } DEFAULT not-set , -- reading frame
|
|
conflict BOOLEAN OPTIONAL , -- conflict
|
|
gaps INTEGER OPTIONAL , -- number of gaps on conflict/except
|
|
mismatch INTEGER OPTIONAL , -- number of mismatches on above
|
|
code Genetic-code OPTIONAL , -- genetic code used
|
|
code-break SEQUENCE OF Code-break OPTIONAL , -- individual exceptions
|
|
stops INTEGER OPTIONAL } -- number of stop codons on above
|
|
|
|
-- each code is 64 cells long, in the order where
|
|
-- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
|
|
-- NOTE: this order does NOT correspond to a Seq-data
|
|
-- encoding. It is "natural" to codon usage instead.
|
|
-- the value in each cell is the AA coded for
|
|
-- start= AA coded only if first in peptide
|
|
-- in start array, if codon is not a legitimate start
|
|
-- codon, that cell will have the "gap" symbol for
|
|
-- that alphabet. Otherwise it will have the AA
|
|
-- encoded when that codon is used at the start.
|
|
|
|
Genetic-code ::= SET OF CHOICE {
|
|
name VisibleString , -- name of a code
|
|
id INTEGER , -- id in dbase
|
|
ncbieaa VisibleString , -- indexed to IUPAC extended
|
|
ncbi8aa OCTET STRING , -- indexed to NCBI8aa
|
|
ncbistdaa OCTET STRING , -- indexed to NCBIstdaa
|
|
sncbieaa VisibleString , -- start, indexed to IUPAC extended
|
|
sncbi8aa OCTET STRING , -- start, indexed to NCBI8aa
|
|
sncbistdaa OCTET STRING } -- start, indexed to NCBIstdaa
|
|
|
|
Code-break ::= SEQUENCE { -- specific codon exceptions
|
|
loc Seq-loc , -- location of exception
|
|
aa CHOICE { -- the amino acid
|
|
ncbieaa INTEGER , -- ASCII value of NCBIeaa code
|
|
ncbi8aa INTEGER , -- NCBI8aa code
|
|
ncbistdaa INTEGER } } -- NCBIstdaa code
|
|
|
|
Genetic-code-table ::= SET OF Genetic-code -- table of genetic codes
|
|
|
|
--*** Import ***********************************************
|
|
--*
|
|
--* Features imported from other databases
|
|
--*
|
|
|
|
Imp-feat ::= SEQUENCE {
|
|
key VisibleString ,
|
|
loc VisibleString OPTIONAL , -- original location string
|
|
descr VisibleString OPTIONAL } -- text description
|
|
|
|
Gb-qual ::= SEQUENCE {
|
|
qual VisibleString ,
|
|
val VisibleString }
|
|
|
|
|
|
--*** Clone-ref ***********************************************
|
|
--*
|
|
--* Specification of clone features
|
|
--*
|
|
|
|
Clone-ref ::= SEQUENCE {
|
|
name VisibleString, -- Official clone symbol
|
|
library VisibleString OPTIONAL, -- Library name
|
|
|
|
concordant BOOLEAN DEFAULT FALSE, -- OPTIONAL?
|
|
unique BOOLEAN DEFAULT FALSE, -- OPTIONAL?
|
|
placement-method INTEGER {
|
|
end-seq (0), -- Clone placed by end sequence
|
|
insert-alignment (1), -- Clone placed by insert alignment
|
|
sts (2), -- Clone placed by STS
|
|
fish (3),
|
|
fingerprint (4),
|
|
end-seq-insert-alignment (5), -- combined end-seq and insert align
|
|
external (253), -- Placement provided externally
|
|
curated (254), -- Human placed or approved
|
|
other (255)
|
|
} OPTIONAL,
|
|
clone-seq Clone-seq-set OPTIONAL
|
|
}
|
|
|
|
Clone-seq-set ::= SET OF Clone-seq
|
|
|
|
|
|
Clone-seq ::= SEQUENCE {
|
|
type INTEGER {
|
|
insert (0),
|
|
end (1),
|
|
other (255)
|
|
},
|
|
confidence INTEGER {
|
|
multiple (0), -- Multiple hits
|
|
na (1), -- Unspecified
|
|
nohit-rep (2), -- No hits, end flagged repetitive
|
|
nohitnorep (3), -- No hits, end not flagged repetitive
|
|
other-chrm (4), -- Hit on different chromosome
|
|
unique (5),
|
|
virtual (6), -- Virtual (hasn't been sequenced)
|
|
multiple-rep (7), -- Multiple hits, end flagged repetitive
|
|
multiplenorep (8), -- Multiple hits, end not flagged repetitive
|
|
no-hit (9), -- No hits
|
|
other (255)
|
|
} OPTIONAL,
|
|
location Seq-loc, -- location on sequence
|
|
seq Seq-loc OPTIONAL, -- clone sequence location
|
|
align-id Dbtag OPTIONAL, -- internal alignment identifier
|
|
support INTEGER {
|
|
prototype (0), -- sequence used to place clone
|
|
supporting (1), -- sequence supports placement
|
|
supports-other(2), -- supports a different placement
|
|
non-supporting (3) -- does not support any placement
|
|
} OPTIONAL
|
|
}
|
|
|
|
END
|
|
|
|
|
|
--*** Variation-ref ***********************************************
|
|
--*
|
|
--* Specification of variation features
|
|
--*
|
|
|
|
NCBI-Variation DEFINITIONS ::=
|
|
BEGIN
|
|
|
|
EXPORTS Variation-ref, Variation-inst, VariantProperties,
|
|
Population-data, Phenotype;
|
|
|
|
IMPORTS Int-fuzz, User-object, Object-id, Dbtag FROM NCBI-General
|
|
Seq-literal FROM NCBI-Sequence
|
|
SubSource FROM NCBI-BioSource
|
|
Seq-loc FROM NCBI-Seqloc
|
|
Pub FROM NCBI-Pub;
|
|
|
|
|
|
-- --------------------------------------------------------------------------
|
|
-- Historically, the dbSNP definitions document data structures used in the
|
|
-- processing and annotation of variations by the dbSNP group. The intention
|
|
-- is to provide information to clients that reflect internal information
|
|
-- produced during the mapping of SNPs
|
|
-- --------------------------------------------------------------------------
|
|
|
|
VariantProperties ::= SEQUENCE {
|
|
version INTEGER,
|
|
|
|
-- NOTE:
|
|
-- The format for most of these values is as an integer
|
|
-- Unless otherwise noted, these integers represent a bitwise OR (= simple
|
|
-- sum) of the possible values, and as such, these values represent the
|
|
-- specific bit flags that may be set for each of the possible attributes
|
|
-- here.
|
|
|
|
resource-link INTEGER {
|
|
preserved (1), -- Clinical, Pubmed, Cited, (0x01)
|
|
provisional (2), -- Provisional Third Party Annotations (0x02)
|
|
has3D (4), -- Has 3D strcture SNP3D table (0x04)
|
|
submitterLinkout (8), -- SNP->SubSNP->Batch link_out (0x08)
|
|
clinical (16), -- Clinical if LSDB, OMIM, TPA, Diagnostic (0x10)
|
|
genotypeKit (32) -- Marker exists on high density genotyping kit
|
|
-- (0x20)
|
|
} OPTIONAL,
|
|
|
|
gene-location INTEGER {
|
|
in-gene (1), -- Sequence intervals covered by a gene ID but not
|
|
-- having an aligned transcript (0x01)
|
|
near-gene-5 (2), -- Within 2kb of the 5' end of a gene feature
|
|
near-gene-3 (4), -- Within 0.5kb of the 3' end of a gene feature
|
|
intron (8), -- In Intron (0x08)
|
|
donor (16), -- In donor splice-site (0x10)
|
|
acceptor (32), -- In acceptor splice-site (0x20)
|
|
utr-5 (64), -- In 5' UTR (0x40)
|
|
utr-3 (128), -- In 3' UTR (0x80)
|
|
in-start-codon(256), -- the variant is observed in a start codon
|
|
-- (0x100)
|
|
in-stop-codon (512), -- the variant is observed in a stop codon
|
|
-- (0x200)
|
|
intergenic (1024), -- variant located between genes (0x400)
|
|
conserved-noncoding(2048) -- variant is located in a conserved
|
|
-- non-coding region (0x800)
|
|
} OPTIONAL,
|
|
|
|
effect INTEGER {
|
|
no-change (0), -- known to cause no functional changes
|
|
-- since 0 does not combine with any other bit
|
|
-- value, 'no-change' specifically implies that
|
|
-- there are no consequences
|
|
synonymous (1), -- one allele in the set does not change the encoded
|
|
-- amino acid (0x1)
|
|
nonsense (2), -- one allele in the set changes to STOP codon
|
|
-- (TER). (0x2)
|
|
missense (4), -- one allele in the set changes protein peptide
|
|
-- (0x4)
|
|
frameshift (8), -- one allele in the set changes all downstream
|
|
-- amino acids (0x8)
|
|
|
|
up-regulator (16), -- the variant causes increased transcription
|
|
-- (0x10)
|
|
down-regulator(32), -- the variant causes decreased transcription
|
|
-- (0x20)
|
|
methylation (64),
|
|
stop-gain (128), -- reference codon is not stop codon, but the snp
|
|
-- variant allele changes the codon to a
|
|
-- terminating codon.
|
|
stop-loss (256) -- reverse of STOP-GAIN: reference codon is a
|
|
-- stop codon, but a snp variant allele changes
|
|
-- the codon to a non-terminating codon.
|
|
} OPTIONAL,
|
|
|
|
mapping INTEGER {
|
|
has-other-snp (1), -- Another SNP has the same mapped positions
|
|
-- on reference assembly (0x01)
|
|
has-assembly-conflict (2), -- Weight 1 or 2 SNPs that map to different
|
|
-- chromosomes on different assemblies (0x02)
|
|
is-assembly-specific (4) -- Only maps to 1 assembly (0x04)
|
|
} OPTIONAL,
|
|
|
|
-- map-weight captures specificity of placement
|
|
-- NOTE: This is *NOT* a bitfield
|
|
map-weight INTEGER {
|
|
is-uniquely-placed(1),
|
|
placed-twice-on-same-chrom(2),
|
|
placed-twice-on-diff-chrom(3),
|
|
many-placements(10)
|
|
} OPTIONAL,
|
|
|
|
frequency-based-validation INTEGER {
|
|
is-mutation (1), -- low frequency variation that is cited in
|
|
-- journal or other reputable sources (0x01)
|
|
above-5pct-all (2), -- >5% minor allele freq in each and all
|
|
-- populations (0x02)
|
|
above-5pct-1plus (4), -- >5% minor allele freq in 1+ populations (0x04)
|
|
validated (8), -- Bit is set if the variant has a minor allele
|
|
-- observed in two or more separate chromosomes
|
|
above-1pct-all (16), -- >1% minor allele freq in each and all
|
|
-- populations (0x10)
|
|
above-1pct-1plus (32) -- >1% minor allele freq in 1+ populations (0x20)
|
|
} OPTIONAL,
|
|
|
|
genotype INTEGER {
|
|
in-haplotype-set (1), -- Exists in a haplotype tagging set (0x01)
|
|
has-genotypes (2) -- SNP has individual genotype (0x02)
|
|
} OPTIONAL,
|
|
|
|
-- project IDs are IDs from BioProjects
|
|
-- in order to report information about project relationships, we
|
|
-- require projects to be registered
|
|
-- This field in many ways duplicates dbxrefs; however, the
|
|
-- intention of this field is to more adequately reflect
|
|
-- ownership and data source
|
|
--
|
|
-- 11/9/2010: DO NOT USE
|
|
-- This field was changed in the spec in a breaking way; using it will
|
|
-- break clients. We are officially suppressing / abandoning this field.
|
|
-- Clients who need to use this should instead place the data in
|
|
-- Seq-feat.dbxref, using the db name 'BioProject'
|
|
project-data SET OF INTEGER OPTIONAL,
|
|
|
|
quality-check INTEGER {
|
|
contig-allele-missing (1), -- Reference sequence allele at the mapped
|
|
-- position is not present in the SNP
|
|
-- allele list, adjusted for orientation
|
|
-- (0x01)
|
|
withdrawn-by-submitter (2), -- One member SS is withdrawn by submitter
|
|
-- (0x02)
|
|
non-overlapping-alleles (4), -- RS set has 2+ alleles from different
|
|
-- submissions and these sets share no
|
|
-- alleles in common (0x04)
|
|
strain-specific (8), -- Straing specific fixed difference (0x08)
|
|
genotype-conflict (16) -- Has Genotype Conflict (0x10)
|
|
} OPTIONAL,
|
|
|
|
confidence INTEGER {
|
|
unknown (0),
|
|
likely-artifact (1),
|
|
other (255)
|
|
} OPTIONAL,
|
|
|
|
-- has this variant been validated?
|
|
-- While a boolean flag offers no subtle distinctions of validation
|
|
-- methods, occasionally it is only known as a single boolean value
|
|
-- NOTE: this flag is redundant and should be omitted if more comprehensive
|
|
-- validation information is present
|
|
other-validation BOOLEAN OPTIONAL,
|
|
|
|
-- origin of this allele, if known
|
|
-- note that these are powers-of-two, and represent bits; thus, we can
|
|
-- represent more than one state simultaneously through a bitwise OR
|
|
allele-origin INTEGER {
|
|
unknown (0),
|
|
germline (1),
|
|
somatic (2),
|
|
inherited (4),
|
|
paternal (8),
|
|
maternal (16),
|
|
de-novo (32),
|
|
biparental (64),
|
|
uniparental (128),
|
|
not-tested (256),
|
|
tested-inconclusive (512),
|
|
not-reported (1024),
|
|
|
|
-- stopper - 2^31
|
|
other (1073741824)
|
|
} OPTIONAL,
|
|
|
|
-- observed allele state, if known
|
|
-- NOTE: THIS IS NOT A BITFIELD!
|
|
allele-state INTEGER {
|
|
unknown (0),
|
|
homozygous (1),
|
|
heterozygous (2),
|
|
hemizygous (3),
|
|
nullizygous (4),
|
|
other (255)
|
|
} OPTIONAL,
|
|
|
|
-- NOTE:
|
|
-- 'allele-frequency' here refers to the minor allele frequency of the
|
|
-- default population
|
|
allele-frequency REAL OPTIONAL,
|
|
|
|
-- is this variant the ancestral allele?
|
|
is-ancestral-allele BOOLEAN OPTIONAL
|
|
}
|
|
|
|
Phenotype ::= SEQUENCE {
|
|
source VisibleString OPTIONAL,
|
|
term VisibleString OPTIONAL,
|
|
xref SET OF Dbtag OPTIONAL,
|
|
|
|
-- does this variant have known clinical significance?
|
|
clinical-significance INTEGER {
|
|
unknown (0),
|
|
untested (1),
|
|
non-pathogenic (2),
|
|
probable-non-pathogenic (3),
|
|
probable-pathogenic (4),
|
|
pathogenic (5),
|
|
drug-response (6),
|
|
histocompatibility (7),
|
|
other (255)
|
|
} OPTIONAL
|
|
}
|
|
|
|
Population-data ::= SEQUENCE {
|
|
-- assayed population (e.g. HAPMAP-CEU)
|
|
population VisibleString,
|
|
genotype-frequency REAL OPTIONAL,
|
|
chromosomes-tested INTEGER OPTIONAL,
|
|
sample-ids SET OF Object-id OPTIONAL,
|
|
allele-frequency REAL OPTIONAL,
|
|
|
|
-- This field is an explicit bit-field
|
|
-- Valid values should be a bitwise combination (= simple sum)
|
|
-- of any of the values below
|
|
flags INTEGER {
|
|
is-default-population (1),
|
|
is-minor-allele (2),
|
|
is-rare-allele (4)
|
|
} OPTIONAL
|
|
}
|
|
|
|
Ext-loc ::= SEQUENCE {
|
|
id Object-id,
|
|
location Seq-loc
|
|
}
|
|
|
|
|
|
Variation-ref ::= SEQUENCE {
|
|
-- ids (i.e., SNP rsid / ssid, dbVar nsv/nssv)
|
|
-- expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1'
|
|
--
|
|
-- we relate three kinds of IDs here:
|
|
-- - our current object's id
|
|
-- - the id of this object's parent, if it exists
|
|
-- - the sample ID that this item originates from
|
|
id Dbtag OPTIONAL,
|
|
parent-id Dbtag OPTIONAL,
|
|
sample-id Object-id OPTIONAL,
|
|
other-ids SET OF Dbtag OPTIONAL,
|
|
|
|
-- names and synonyms
|
|
-- some variants have well-known canonical names and possible accepted
|
|
-- synonyms
|
|
name VisibleString OPTIONAL,
|
|
synonyms SET OF VisibleString OPTIONAL,
|
|
|
|
-- tag for comment and descriptions
|
|
description VisibleString OPTIONAL,
|
|
|
|
-- phenotype
|
|
phenotype SET OF Phenotype OPTIONAL,
|
|
|
|
-- sequencing / acuisition method
|
|
method SET OF INTEGER {
|
|
unknown (0),
|
|
bac-acgh (1),
|
|
computational (2),
|
|
curated (3),
|
|
digital-array (4),
|
|
expression-array (5),
|
|
fish (6),
|
|
flanking-sequence (7),
|
|
maph (8),
|
|
mcd-analysis (9),
|
|
mlpa (10),
|
|
oea-assembly (11),
|
|
oligo-acgh (12),
|
|
paired-end (13),
|
|
pcr (14),
|
|
qpcr (15),
|
|
read-depth (16),
|
|
roma (17),
|
|
rt-pcr (18),
|
|
sage (19),
|
|
sequence-alignment (20),
|
|
sequencing (21),
|
|
snp-array (22),
|
|
snp-genoytyping (23),
|
|
southern (24),
|
|
western (25),
|
|
optical-mapping (26),
|
|
|
|
other (255)
|
|
} OPTIONAL,
|
|
|
|
-- Note about SNP representation and pretinent fields: allele-frequency,
|
|
-- population, quality-codes:
|
|
-- The case of multiple alleles for a SNP would be described by
|
|
-- parent-feature of type Variation-set.diff-alleles, where the child
|
|
-- features of type Variation-inst, all at the same location, would
|
|
-- describe individual alleles.
|
|
|
|
-- population data
|
|
-- DEPRECATED - do not use
|
|
population-data SET OF Population-data OPTIONAL,
|
|
|
|
-- variant properties bit fields
|
|
variant-prop VariantProperties OPTIONAL,
|
|
|
|
-- has this variant been validated?
|
|
-- DEPRECATED: new field = VariantProperties.other-validation
|
|
validated BOOLEAN OPTIONAL,
|
|
|
|
-- link-outs to GeneTests database
|
|
-- DEPRECATED - do not use
|
|
clinical-test SET OF Dbtag OPTIONAL,
|
|
|
|
-- origin of this allele, if known
|
|
-- note that these are powers-of-two, and represent bits; thus, we can
|
|
-- represent more than one state simultaneously through a bitwise OR
|
|
-- DEPRECATED: new field = VariantProperties.allele-origin
|
|
allele-origin INTEGER {
|
|
unknown (0),
|
|
germline (1),
|
|
somatic (2),
|
|
inherited (4),
|
|
paternal (8),
|
|
maternal (16),
|
|
de-novo (32),
|
|
biparental (64),
|
|
uniparental (128),
|
|
not-tested (256),
|
|
tested-inconclusive (512),
|
|
|
|
-- stopper - 2^31
|
|
other (1073741824)
|
|
} OPTIONAL,
|
|
|
|
-- observed allele state, if known
|
|
-- DEPRECATED: new field = VariantProperties.allele-state
|
|
allele-state INTEGER {
|
|
unknown (0),
|
|
homozygous (1),
|
|
heterozygous (2),
|
|
hemizygous (3),
|
|
nullizygous (4),
|
|
other (255)
|
|
} OPTIONAL,
|
|
|
|
-- NOTE:
|
|
-- 'allele-frequency' here refers to the minor allele frequency of the
|
|
-- default population
|
|
-- DEPRECATED: new field = VariantProperties.allele-frequency
|
|
allele-frequency REAL OPTIONAL,
|
|
|
|
-- is this variant the ancestral allele?
|
|
-- DEPRECATED: new field = VariantProperties.is-ancestral-allele
|
|
is-ancestral-allele BOOLEAN OPTIONAL,
|
|
|
|
-- publication support.
|
|
-- Note: made this pub instead of pub-equiv, since
|
|
-- Pub can be pub-equiv and pub-equiv is a set of pubs, but it looks like
|
|
-- Pub is more often used as top-level container
|
|
-- DEPRECATED - do not use; use Seq-feat.dbxref instead
|
|
pub Pub OPTIONAL,
|
|
|
|
data CHOICE {
|
|
unknown NULL,
|
|
note VisibleString, --free-form
|
|
uniparental-disomy NULL,
|
|
|
|
-- actual sequence-edit at feat.location
|
|
instance Variation-inst,
|
|
|
|
-- Set of related Variations.
|
|
-- Location of the set equals to the union of member locations
|
|
set SEQUENCE {
|
|
type INTEGER {
|
|
unknown (0),
|
|
compound (1), -- complex change at the same location on the
|
|
-- same molecule
|
|
products (2), -- different products arising from the same
|
|
-- variation in a precursor, e.g. r.[13g>a,
|
|
-- 13_88del]
|
|
haplotype (3), -- changes on the same allele, e.g
|
|
-- r.[13g>a;15u>c]
|
|
genotype (4), -- changes on different alleles in the same
|
|
-- genotype, e.g. g.[476C>T]+[476C>T]
|
|
mosaic (5), -- different genotypes in the same individual
|
|
individual (6), -- same organism; allele relationship unknown,
|
|
-- e.g. g.[476C>T(+)183G>C]
|
|
population (7), -- population
|
|
alleles (8), -- set represents a set of observed alleles
|
|
package (9), -- set represents a package of observations at
|
|
-- a given location, generally containing
|
|
-- asserted + reference
|
|
other (255)
|
|
},
|
|
variations SET OF Variation-ref,
|
|
name VisibleString OPTIONAL
|
|
},
|
|
|
|
-- variant is a complex and undescribed change at the location
|
|
-- This type of variant is known to occur in dbVar submissions
|
|
complex NULL
|
|
},
|
|
|
|
consequence SET OF CHOICE {
|
|
unknown NULL,
|
|
splicing NULL, --some effect on splicing
|
|
note VisibleString, --freeform
|
|
|
|
-- Describe resulting variation in the product, e.g. missense,
|
|
-- nonsense, silent, neutral, etc in a protein, that arises from
|
|
-- THIS variation.
|
|
variation Variation-ref,
|
|
|
|
-- see http://www.hgvs.org/mutnomen/recs-prot.html
|
|
frameshift SEQUENCE {
|
|
phase INTEGER OPTIONAL,
|
|
x-length INTEGER OPTIONAL
|
|
},
|
|
|
|
loss-of-heterozygosity SEQUENCE {
|
|
-- In germline comparison, it will be reference genome assembly
|
|
-- (default) or reference/normal population. In somatic mutation,
|
|
-- it will be a name of the normal tissue.
|
|
reference VisibleString OPTIONAL,
|
|
|
|
-- Name of the testing subject type or the testing tissue.
|
|
test VisibleString OPTIONAL
|
|
}
|
|
} OPTIONAL,
|
|
|
|
-- Observed location, if different from the parent set or feature.location.
|
|
-- DEPRECATED - do not use
|
|
location Seq-loc OPTIONAL,
|
|
|
|
-- reference other locs, e.g. mapped source
|
|
-- DEPRECATED - do not use
|
|
ext-locs SET OF Ext-loc OPTIONAL,
|
|
|
|
-- DEPRECATED - do not use; use Seq-feat.exts instead
|
|
ext User-object OPTIONAL,
|
|
|
|
somatic-origin SET OF SEQUENCE {
|
|
-- description of the somatic origin itself
|
|
source SubSource OPTIONAL,
|
|
-- condition related to this origin's type
|
|
condition SEQUENCE {
|
|
description VisibleString OPTIONAL,
|
|
-- reference to BioTerm / other descriptive database
|
|
object-id SET OF Dbtag OPTIONAL
|
|
} OPTIONAL
|
|
} OPTIONAL
|
|
|
|
}
|
|
|
|
|
|
Delta-item ::= SEQUENCE {
|
|
seq CHOICE {
|
|
literal Seq-literal,
|
|
loc Seq-loc,
|
|
this NULL --same location as variation-ref itself
|
|
} OPTIONAL,
|
|
|
|
-- Multiplier allows representing a tandem, e.g. ATATAT as AT*3
|
|
-- This allows describing CNV/SSR where delta=self with a
|
|
-- multiplier which specifies the count of the repeat unit.
|
|
|
|
multiplier INTEGER OPTIONAL, --assumed 1 if not specified.
|
|
multiplier-fuzz Int-fuzz OPTIONAL,
|
|
|
|
action INTEGER {
|
|
|
|
-- replace len(seq) positions starting with location.start with seq
|
|
morph (0),
|
|
|
|
-- go downstream by distance specified by multiplier (upstream if < 0),
|
|
-- in genomic context.
|
|
offset (1),
|
|
|
|
-- excise sequence at location
|
|
-- if multiplier is specified, delete len(location)*multiplier
|
|
-- positions downstream
|
|
del-at (2),
|
|
|
|
-- insert seq before the location.start
|
|
ins-before (3)
|
|
|
|
} DEFAULT morph
|
|
}
|
|
|
|
|
|
-- Variation instance
|
|
Variation-inst ::= SEQUENCE {
|
|
type INTEGER {
|
|
unknown (0), -- delta=[]
|
|
identity (1), -- delta=[]
|
|
inv (2), -- delta=[del, ins.seq=
|
|
-- RevComp(variation-location)]
|
|
snv (3), -- delta=[morph of length 1]
|
|
-- NOTE: this is snV not snP; the latter
|
|
-- requires frequency-based validation to be
|
|
-- established in VariantProperties
|
|
-- the strict definition of SNP is an SNV with
|
|
-- an established population frequency of at
|
|
-- least 1% in at least 1 popuplation
|
|
mnp (4), -- delta=[morph of length >1]
|
|
delins (5), -- delta=[del, ins]
|
|
del (6), -- delta=[del]
|
|
ins (7), -- delta=[ins]
|
|
microsatellite (8), -- delta=[del, ins.seq= repeat-unit with fuzzy
|
|
-- multiplier]
|
|
-- variation-location is the microsat expansion
|
|
-- on the sequence
|
|
transposon (9), -- delta=[del, ins.seq= known donor or 'this']
|
|
-- variation-location is equiv of transposon
|
|
-- locs.
|
|
cnv (10), -- delta=[del, ins= 'this' with fuzzy
|
|
-- multiplier]
|
|
direct-copy (11), -- delta=[ins.seq= upstream location on the
|
|
-- same strand]
|
|
rev-direct-copy (12), -- delta=[ins.seq= downstream location on the
|
|
-- same strand]
|
|
inverted-copy (13), -- delta=[ins.seq= upstream location on the
|
|
-- opposite strand]
|
|
everted-copy (14), -- delta=[ins.seq= downstream location on the
|
|
-- opposite strand]
|
|
translocation (15), -- delta=like delins
|
|
prot-missense (16), -- delta=[morph of length 1]
|
|
prot-nonsense (17), -- delta=[del]; variation-location is the tail
|
|
-- of the protein being truncated
|
|
prot-neutral (18), -- delta=[morph of length 1]
|
|
prot-silent (19), -- delta=[morph of length 1, same AA as at
|
|
-- variation-location]
|
|
prot-other (20), -- delta=any
|
|
|
|
other (255) -- delta=any
|
|
},
|
|
|
|
-- Sequence that replaces the location, in biological order.
|
|
delta SEQUENCE OF Delta-item,
|
|
|
|
-- 'observation' is used to label items in a Variation-ref package
|
|
-- This field is explicitly a bit-field, so the bitwise OR (= sum) of any
|
|
-- of the values may be observed.
|
|
observation INTEGER {
|
|
asserted (1), -- inst represents the asserted base at a
|
|
-- position
|
|
reference (2), -- inst represents the reference base at the
|
|
-- position
|
|
variant (4) -- inst represent the observed variant at a
|
|
-- given position
|
|
} OPTIONAL
|
|
}
|
|
|
|
END
|
|
|
|
|
|
--**********************************************************************
|
|
--
|
|
-- NCBI Restriction Sites
|
|
-- by James Ostell, 1990
|
|
-- version 0.8
|
|
--
|
|
--**********************************************************************
|
|
|
|
NCBI-Rsite DEFINITIONS ::=
|
|
BEGIN
|
|
|
|
EXPORTS Rsite-ref;
|
|
|
|
IMPORTS Dbtag FROM NCBI-General;
|
|
|
|
Rsite-ref ::= CHOICE {
|
|
str VisibleString , -- may be unparsable
|
|
db Dbtag } -- pointer to a restriction site database
|
|
|
|
END
|
|
|
|
--**********************************************************************
|
|
--
|
|
-- NCBI RNAs
|
|
-- by James Ostell, 1990
|
|
-- version 0.8
|
|
--
|
|
--**********************************************************************
|
|
|
|
NCBI-RNA DEFINITIONS ::=
|
|
BEGIN
|
|
|
|
EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set;
|
|
|
|
IMPORTS Seq-loc FROM NCBI-Seqloc;
|
|
|
|
--*** rnas ***********************************************
|
|
--*
|
|
--* various rnas
|
|
--*
|
|
-- minimal RNA sequence
|
|
RNA-ref ::= SEQUENCE {
|
|
type ENUMERATED { -- type of RNA feature
|
|
unknown (0) ,
|
|
premsg (1) ,
|
|
mRNA (2) ,
|
|
tRNA (3) ,
|
|
rRNA (4) ,
|
|
snRNA (5) , -- will become ncRNA, with RNA-gen.class = snRNA
|
|
scRNA (6) , -- will become ncRNA, with RNA-gen.class = scRNA
|
|
snoRNA (7) , -- will become ncRNA, with RNA-gen.class = snoRNA
|
|
ncRNA (8) , -- non-coding RNA; subsumes snRNA, scRNA, snoRNA
|
|
tmRNA (9) ,
|
|
miscRNA (10) ,
|
|
other (255) } ,
|
|
pseudo BOOLEAN OPTIONAL ,
|
|
ext CHOICE {
|
|
name VisibleString , -- for naming "other" type
|
|
tRNA Trna-ext , -- for tRNAs
|
|
gen RNA-gen } OPTIONAL -- generic fields for ncRNA, tmRNA, miscRNA
|
|
}
|
|
|
|
Trna-ext ::= SEQUENCE { -- tRNA feature extensions
|
|
aa CHOICE { -- aa this carries
|
|
iupacaa INTEGER ,
|
|
ncbieaa INTEGER ,
|
|
ncbi8aa INTEGER ,
|
|
ncbistdaa INTEGER } OPTIONAL ,
|
|
codon SET OF INTEGER OPTIONAL , -- codon(s) as in Genetic-code
|
|
anticodon Seq-loc OPTIONAL } -- location of anticodon
|
|
|
|
RNA-gen ::= SEQUENCE {
|
|
class VisibleString OPTIONAL , -- for ncRNAs, the class of non-coding RNA:
|
|
-- examples: antisense_RNA, guide_RNA, snRNA
|
|
product VisibleString OPTIONAL ,
|
|
quals RNA-qual-set OPTIONAL -- e.g., tag_peptide qualifier for tmRNAs
|
|
}
|
|
|
|
RNA-qual ::= SEQUENCE { -- Additional data values for RNA-gen,
|
|
qual VisibleString , -- in a tag (qual), value (val) format
|
|
val VisibleString }
|
|
|
|
RNA-qual-set ::= SEQUENCE OF RNA-qual
|
|
|
|
END
|
|
|
|
--**********************************************************************
|
|
--
|
|
-- NCBI Genes
|
|
-- by James Ostell, 1990
|
|
-- version 0.8
|
|
--
|
|
--**********************************************************************
|
|
|
|
NCBI-Gene DEFINITIONS ::=
|
|
BEGIN
|
|
|
|
EXPORTS Gene-ref, Gene-nomenclature;
|
|
|
|
IMPORTS Dbtag FROM NCBI-General;
|
|
|
|
--*** Gene ***********************************************
|
|
--*
|
|
--* reference to a gene
|
|
--*
|
|
|
|
Gene-ref ::= SEQUENCE {
|
|
locus VisibleString OPTIONAL , -- Official gene symbol
|
|
allele VisibleString OPTIONAL , -- Official allele designation
|
|
desc VisibleString OPTIONAL , -- descriptive name
|
|
maploc VisibleString OPTIONAL , -- descriptive map location
|
|
pseudo BOOLEAN DEFAULT FALSE , -- pseudogene
|
|
db SET OF Dbtag OPTIONAL , -- ids in other dbases
|
|
syn SET OF VisibleString OPTIONAL , -- synonyms for locus
|
|
locus-tag VisibleString OPTIONAL , -- systematic gene name (e.g., MI0001, ORF0069)
|
|
formal-name Gene-nomenclature OPTIONAL
|
|
}
|
|
|
|
Gene-nomenclature ::= SEQUENCE {
|
|
status ENUMERATED {
|
|
unknown (0) ,
|
|
official (1) ,
|
|
interim (2)
|
|
} ,
|
|
symbol VisibleString OPTIONAL ,
|
|
name VisibleString OPTIONAL ,
|
|
source Dbtag OPTIONAL
|
|
}
|
|
|
|
END
|
|
|
|
|
|
--**********************************************************************
|
|
--
|
|
-- NCBI Organism
|
|
-- by James Ostell, 1994
|
|
-- version 3.0
|
|
--
|
|
--**********************************************************************
|
|
|
|
NCBI-Organism DEFINITIONS ::=
|
|
BEGIN
|
|
|
|
EXPORTS Org-ref;
|
|
|
|
IMPORTS Dbtag FROM NCBI-General;
|
|
|
|
--*** Org-ref ***********************************************
|
|
--*
|
|
--* Reference to an organism
|
|
--* defines only the organism.. lower levels of detail for biological
|
|
--* molecules are provided by the Source object
|
|
--*
|
|
|
|
Org-ref ::= SEQUENCE {
|
|
taxname VisibleString OPTIONAL , -- preferred formal name
|
|
common VisibleString OPTIONAL , -- common name
|
|
mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
|
|
db SET OF Dbtag OPTIONAL , -- ids in taxonomic or culture dbases
|
|
syn SET OF VisibleString OPTIONAL , -- synonyms for taxname or common
|
|
orgname OrgName OPTIONAL }
|
|
|
|
|
|
OrgName ::= SEQUENCE {
|
|
name CHOICE {
|
|
binomial BinomialOrgName , -- genus/species type name
|
|
virus VisibleString , -- virus names are different
|
|
hybrid MultiOrgName , -- hybrid between organisms
|
|
namedhybrid BinomialOrgName , -- some hybrids have genus x species name
|
|
partial PartialOrgName } OPTIONAL , -- when genus not known
|
|
attrib VisibleString OPTIONAL , -- attribution of name
|
|
mod SEQUENCE OF OrgMod OPTIONAL ,
|
|
lineage VisibleString OPTIONAL , -- lineage with semicolon separators
|
|
gcode INTEGER OPTIONAL , -- genetic code (see CdRegion)
|
|
mgcode INTEGER OPTIONAL , -- mitochondrial genetic code
|
|
div VisibleString OPTIONAL , -- GenBank division code
|
|
pgcode INTEGER OPTIONAL } -- plastid genetic code
|
|
|
|
|
|
OrgMod ::= SEQUENCE {
|
|
subtype INTEGER {
|
|
strain (2) ,
|
|
substrain (3) ,
|
|
type (4) ,
|
|
subtype (5) ,
|
|
variety (6) ,
|
|
serotype (7) ,
|
|
serogroup (8) ,
|
|
serovar (9) ,
|
|
cultivar (10) ,
|
|
pathovar (11) ,
|
|
chemovar (12) ,
|
|
biovar (13) ,
|
|
biotype (14) ,
|
|
group (15) ,
|
|
subgroup (16) ,
|
|
isolate (17) ,
|
|
common (18) ,
|
|
acronym (19) ,
|
|
dosage (20) , -- chromosome dosage of hybrid
|
|
nat-host (21) , -- natural host of this specimen
|
|
sub-species (22) ,
|
|
specimen-voucher (23) ,
|
|
authority (24) ,
|
|
forma (25) ,
|
|
forma-specialis (26) ,
|
|
ecotype (27) ,
|
|
synonym (28) ,
|
|
anamorph (29) ,
|
|
teleomorph (30) ,
|
|
breed (31) ,
|
|
gb-acronym (32) , -- used by taxonomy database
|
|
gb-anamorph (33) , -- used by taxonomy database
|
|
gb-synonym (34) , -- used by taxonomy database
|
|
culture-collection (35) ,
|
|
bio-material (36) ,
|
|
metagenome-source (37) ,
|
|
type-material (38) ,
|
|
old-lineage (253) ,
|
|
old-name (254) ,
|
|
other (255) } , -- ASN5: old-name (254) will be added to next spec
|
|
subname VisibleString ,
|
|
attrib VisibleString OPTIONAL } -- attribution/source of name
|
|
|
|
BinomialOrgName ::= SEQUENCE {
|
|
genus VisibleString , -- required
|
|
species VisibleString OPTIONAL , -- species required if subspecies used
|
|
subspecies VisibleString OPTIONAL }
|
|
|
|
MultiOrgName ::= SEQUENCE OF OrgName -- the first will be used to assign division
|
|
|
|
PartialOrgName ::= SEQUENCE OF TaxElement -- when we don't know the genus
|
|
|
|
TaxElement ::= SEQUENCE {
|
|
fixed-level INTEGER {
|
|
other (0) , -- level must be set in string
|
|
family (1) ,
|
|
order (2) ,
|
|
class (3) } ,
|
|
level VisibleString OPTIONAL ,
|
|
name VisibleString }
|
|
|
|
END
|
|
|
|
|
|
--**********************************************************************
|
|
--
|
|
-- NCBI BioSource
|
|
-- by James Ostell, 1994
|
|
-- version 3.0
|
|
--
|
|
--**********************************************************************
|
|
|
|
NCBI-BioSource DEFINITIONS ::=
|
|
BEGIN
|
|
|
|
EXPORTS BioSource, SubSource;
|
|
|
|
IMPORTS Org-ref FROM NCBI-Organism;
|
|
|
|
--********************************************************************
|
|
--
|
|
-- BioSource gives the source of the biological material
|
|
-- for sequences
|
|
--
|
|
--********************************************************************
|
|
|
|
BioSource ::= SEQUENCE {
|
|
genome INTEGER { -- biological context
|
|
unknown (0) ,
|
|
genomic (1) ,
|
|
chloroplast (2) ,
|
|
chromoplast (3) ,
|
|
kinetoplast (4) ,
|
|
mitochondrion (5) ,
|
|
plastid (6) ,
|
|
macronuclear (7) ,
|
|
extrachrom (8) ,
|
|
plasmid (9) ,
|
|
transposon (10) ,
|
|
insertion-seq (11) ,
|
|
cyanelle (12) ,
|
|
proviral (13) ,
|
|
virion (14) ,
|
|
nucleomorph (15) ,
|
|
apicoplast (16) ,
|
|
leucoplast (17) ,
|
|
proplastid (18) ,
|
|
endogenous-virus (19) ,
|
|
hydrogenosome (20) ,
|
|
chromosome (21) ,
|
|
chromatophore (22) ,
|
|
plasmid-in-mitochondrion (23) ,
|
|
plasmid-in-plastid (24)
|
|
} DEFAULT unknown ,
|
|
origin INTEGER {
|
|
unknown (0) ,
|
|
natural (1) , -- normal biological entity
|
|
natmut (2) , -- naturally occurring mutant
|
|
mut (3) , -- artificially mutagenized
|
|
artificial (4) , -- artificially engineered
|
|
synthetic (5) , -- purely synthetic
|
|
other (255)
|
|
} DEFAULT unknown ,
|
|
org Org-ref ,
|
|
subtype SEQUENCE OF SubSource OPTIONAL ,
|
|
is-focus NULL OPTIONAL , -- to distinguish biological focus
|
|
pcr-primers PCRReactionSet OPTIONAL }
|
|
|
|
PCRReactionSet ::= SET OF PCRReaction
|
|
|
|
PCRReaction ::= SEQUENCE {
|
|
forward PCRPrimerSet OPTIONAL ,
|
|
reverse PCRPrimerSet OPTIONAL }
|
|
|
|
PCRPrimerSet ::= SET OF PCRPrimer
|
|
|
|
PCRPrimer ::= SEQUENCE {
|
|
seq PCRPrimerSeq OPTIONAL ,
|
|
name PCRPrimerName OPTIONAL }
|
|
|
|
PCRPrimerSeq ::= VisibleString
|
|
|
|
PCRPrimerName ::= VisibleString
|
|
|
|
SubSource ::= SEQUENCE {
|
|
subtype INTEGER {
|
|
chromosome (1) ,
|
|
map (2) ,
|
|
clone (3) ,
|
|
subclone (4) ,
|
|
haplotype (5) ,
|
|
genotype (6) ,
|
|
sex (7) ,
|
|
cell-line (8) ,
|
|
cell-type (9) ,
|
|
tissue-type (10) ,
|
|
clone-lib (11) ,
|
|
dev-stage (12) ,
|
|
frequency (13) ,
|
|
germline (14) ,
|
|
rearranged (15) ,
|
|
lab-host (16) ,
|
|
pop-variant (17) ,
|
|
tissue-lib (18) ,
|
|
plasmid-name (19) ,
|
|
transposon-name (20) ,
|
|
insertion-seq-name (21) ,
|
|
plastid-name (22) ,
|
|
country (23) ,
|
|
segment (24) ,
|
|
endogenous-virus-name (25) ,
|
|
transgenic (26) ,
|
|
environmental-sample (27) ,
|
|
isolation-source (28) ,
|
|
lat-lon (29) , -- +/- decimal degrees
|
|
collection-date (30) , -- DD-MMM-YYYY format
|
|
collected-by (31) , -- name of person who collected the sample
|
|
identified-by (32) , -- name of person who identified the sample
|
|
fwd-primer-seq (33) , -- sequence (possibly more than one; semicolon-separated)
|
|
rev-primer-seq (34) , -- sequence (possibly more than one; semicolon-separated)
|
|
fwd-primer-name (35) ,
|
|
rev-primer-name (36) ,
|
|
metagenomic (37) ,
|
|
mating-type (38) ,
|
|
linkage-group (39) ,
|
|
haplogroup (40) ,
|
|
whole-replicon (41) ,
|
|
phenotype (42) ,
|
|
altitude (43) ,
|
|
other (255) } ,
|
|
name VisibleString ,
|
|
attrib VisibleString OPTIONAL } -- attribution/source of this name
|
|
|
|
END
|
|
|
|
--**********************************************************************
|
|
--
|
|
-- NCBI Protein
|
|
-- by James Ostell, 1990
|
|
-- version 0.8
|
|
--
|
|
--**********************************************************************
|
|
|
|
NCBI-Protein DEFINITIONS ::=
|
|
BEGIN
|
|
|
|
EXPORTS Prot-ref;
|
|
|
|
IMPORTS Dbtag FROM NCBI-General;
|
|
|
|
--*** Prot-ref ***********************************************
|
|
--*
|
|
--* Reference to a protein name
|
|
--*
|
|
|
|
Prot-ref ::= SEQUENCE {
|
|
name SET OF VisibleString OPTIONAL , -- protein name
|
|
desc VisibleString OPTIONAL , -- description (instead of name)
|
|
ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
|
|
activity SET OF VisibleString OPTIONAL , -- activities
|
|
db SET OF Dbtag OPTIONAL , -- ids in other dbases
|
|
processed ENUMERATED { -- processing status
|
|
not-set (0) ,
|
|
preprotein (1) ,
|
|
mature (2) ,
|
|
signal-peptide (3) ,
|
|
transit-peptide (4) ,
|
|
propeptide (5) } DEFAULT not-set }
|
|
|
|
END
|
|
--********************************************************************
|
|
--
|
|
-- Transcription Initiation Site Feature Data Block
|
|
-- James Ostell, 1991
|
|
-- Philip Bucher, David Ghosh
|
|
-- version 1.1
|
|
--
|
|
--
|
|
--
|
|
--********************************************************************
|
|
|
|
NCBI-TxInit DEFINITIONS ::=
|
|
BEGIN
|
|
|
|
EXPORTS Txinit;
|
|
|
|
IMPORTS Gene-ref FROM NCBI-Gene
|
|
Prot-ref FROM NCBI-Protein
|
|
Org-ref FROM NCBI-Organism;
|
|
|
|
Txinit ::= SEQUENCE {
|
|
name VisibleString , -- descriptive name of initiation site
|
|
syn SEQUENCE OF VisibleString OPTIONAL , -- synonyms
|
|
gene SEQUENCE OF Gene-ref OPTIONAL , -- gene(s) transcribed
|
|
protein SEQUENCE OF Prot-ref OPTIONAL , -- protein(s) produced
|
|
rna SEQUENCE OF VisibleString OPTIONAL , -- rna(s) produced
|
|
expression VisibleString OPTIONAL , -- tissue/time of expression
|
|
txsystem ENUMERATED { -- transcription apparatus used at this site
|
|
unknown (0) ,
|
|
pol1 (1) , -- eukaryotic Pol I
|
|
pol2 (2) , -- eukaryotic Pol II
|
|
pol3 (3) , -- eukaryotic Pol III
|
|
bacterial (4) ,
|
|
viral (5) ,
|
|
rna (6) , -- RNA replicase
|
|
organelle (7) ,
|
|
other (255) } ,
|
|
txdescr VisibleString OPTIONAL , -- modifiers on txsystem
|
|
txorg Org-ref OPTIONAL , -- organism supplying transcription apparatus
|
|
mapping-precise BOOLEAN DEFAULT FALSE , -- mapping precise or approx
|
|
location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
|
|
inittype ENUMERATED {
|
|
unknown (0) ,
|
|
single (1) ,
|
|
multiple (2) ,
|
|
region (3) } OPTIONAL ,
|
|
evidence SET OF Tx-evidence OPTIONAL }
|
|
|
|
Tx-evidence ::= SEQUENCE {
|
|
exp-code ENUMERATED {
|
|
unknown (0) ,
|
|
rna-seq (1) , -- direct RNA sequencing
|
|
rna-size (2) , -- RNA length measurement
|
|
np-map (3) , -- nuclease protection mapping with homologous sequence ladder
|
|
np-size (4) , -- nuclease protected fragment length measurement
|
|
pe-seq (5) , -- dideoxy RNA sequencing
|
|
cDNA-seq (6) , -- full-length cDNA sequencing
|
|
pe-map (7) , -- primer extension mapping with homologous sequence ladder
|
|
pe-size (8) , -- primer extension product length measurement
|
|
pseudo-seq (9) , -- full-length processed pseudogene sequencing
|
|
rev-pe-map (10) , -- see NOTE (1) below
|
|
other (255) } ,
|
|
expression-system ENUMERATED {
|
|
unknown (0) ,
|
|
physiological (1) ,
|
|
in-vitro (2) ,
|
|
oocyte (3) ,
|
|
transfection (4) ,
|
|
transgenic (5) ,
|
|
other (255) } DEFAULT physiological ,
|
|
low-prec-data BOOLEAN DEFAULT FALSE ,
|
|
from-homolog BOOLEAN DEFAULT FALSE } -- experiment actually done on
|
|
-- close homolog
|
|
|
|
-- NOTE (1) length measurement of a reverse direction primer-extension
|
|
-- product (blocked by RNA 5'end) by comparison with
|
|
-- homologous sequence ladder (J. Mol. Biol. 199, 587)
|
|
|
|
END
|
|
|