/* $Id: Seq_id.cpp 493035 2016-02-23 18:54:09Z gotvyans $
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 * Author:  .......
 *
 * File Description:
 *   .......
 *
 * Remark:
 *   This code was originally generated by application DATATOOL
 *   using specifications from the ASN data definition file
 *   'seqloc.asn'.
 */

// standard includes

// generated includes
#include <ncbi_pch.hpp>
#include <corelib/ncbiutil.hpp>
#include <corelib/ncbiapp.hpp>
#include <util/line_reader.hpp>
#include <util/static_map.hpp>
#include <util/util_misc.hpp>
#include <serial/serialimpl.hpp>

#include <objects/seq/Bioseq.hpp>
#include <objects/seq/Seq_inst.hpp>
#include <objects/seq/seq_id_handle.hpp>

#include <objects/seqloc/Seq_id.hpp>
#include <objects/seqloc/Textseq_id.hpp>
#include <objects/seqloc/Giimport_id.hpp>
#include <objects/seqloc/Patent_seq_id.hpp>
#include <objects/seqloc/PDB_seq_id.hpp>

#include <objects/biblio/Id_pat.hpp>

#include <objects/general/Object_id.hpp>
#include <objects/general/Dbtag.hpp>
#include <objects/general/Date.hpp>
#include <objects/general/Date_std.hpp>
#include <objects/misc/error_codes.hpp>

#include "accguide.inc"


#define NCBI_USE_ERRCODE_X   Objects_SeqId
namespace
{
struct CSeq_id_find_pred
{
    const char* kSymbols;
    bool operator()(const char ch)
    {
        if (ch<32 || ch>127) // non-ASCII is not allowed
            return true;
        return strchr(kSymbols, ch) != 0;
    }
};

};


// generated classes

BEGIN_NCBI_SCOPE
BEGIN_objects_SCOPE // namespace ncbi::objects::


// CSeqIdException
const char* CSeqIdException::GetErrCodeString(void) const
{
    switch (GetErrCode()) {
    case eUnknownType:  return "eUnknownType";
    case eFormat:       return "eFormat";
    default:            return CException::GetErrCodeString();
    }
}


// constructor
CSeq_id::CSeq_id(void)
{
    return;
}

// destructor
CSeq_id::~CSeq_id(void)
{
    return;
}


static void s_SplitVersion(const CTempString& acc_in, CTempString& acc,
                           int& ver)
{
    CTempString verstr;
    NStr::SplitInTwo(acc_in, ".", acc, verstr);
    if (verstr.empty()) {
        ver = 0;
    } else {
        ver = NStr::StringToNonNegativeInt(verstr);
        if (ver <= 0) {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Version embedded in accession " + string(acc_in)
                       + " is not a positive integer");
        }
    }
}


const CTextseq_id* CSeq_id::GetTextseq_Id(void) const
{
    switch ( Which() ) {
    case e_Genbank:
        return &GetGenbank();
    case e_Embl:
        return &GetEmbl();
    case e_Ddbj:
        return &GetDdbj();
    case e_Pir:
        return &GetPir();
    case e_Swissprot:
        return &GetSwissprot();
    case e_Other:
        return &GetOther();
    case e_Prf:
        return &GetPrf();
    case e_Tpg:
        return &GetTpg();
    case e_Tpe:
        return &GetTpe();
    case e_Tpd:
        return &GetTpd();
    case e_Gpipe:
        return &GetGpipe();
    case e_Named_annot_track:
        return &GetNamed_annot_track();
    default:
        return 0;
    }
}


inline
void x_Assign(CObject_id& dst, const CObject_id& src)
{
    switch ( src.Which() ) {
    case CObject_id::e_not_set:
        dst.Reset();
        return;
    case CObject_id::e_Id:
        dst.SetId(src.GetId());
        return;
    case CObject_id::e_Str:
        dst.SetStr(src.GetStr());
        return;
    default:
        NCBI_THROW(CSeqIdException, eFormat, "invalid Object-id variant");
    }
}


inline
void x_Assign(CGiimport_id& dst, const CGiimport_id& src)
{
    dst.SetId(src.GetId());
    if ( src.IsSetDb() ) {
        dst.SetDb(src.GetDb());
    }
    else {
        dst.ResetDb();
    }
    if ( src.IsSetRelease() ) {
        dst.SetRelease(src.GetRelease());
    }
    else {
        dst.ResetRelease();
    }
}


inline
void x_Assign(CTextseq_id& dst, const CTextseq_id& src)
{
    if ( src.IsSetName() ) {
        dst.SetName(src.GetName());
    }
    else {
        dst.ResetName();
    }
    if ( src.IsSetAccession() ) {
        dst.SetAccession(src.GetAccession());
    }
    else {
        dst.ResetAccession();
    }
    if ( src.IsSetRelease() ) {
        dst.SetRelease(src.GetRelease());
    }
    else {
        dst.ResetRelease();
    }
    if ( src.IsSetVersion() ) {
        dst.SetVersion(src.GetVersion());
    }
    else {
        dst.ResetVersion();
    }
}


inline
void x_Assign(CDbtag& dst, const CDbtag& src)
{
    dst.SetDb(src.GetDb());
    x_Assign(dst.SetTag(), src.GetTag());
}


inline
void x_Assign(CPatent_seq_id& dst, const CPatent_seq_id& src)
{
    dst.SetSeqid(src.GetSeqid());
    dst.SetCit().Assign(src.GetCit());
}


inline
void x_Assign(CDate& dst, const CDate& src)
{
    dst.Assign(src);
}


inline
void x_Assign(CPDB_seq_id& dst, const CPDB_seq_id& src)
{
    dst.SetMol().Set(src.GetMol());
    if ( src.IsSetChain() ) {
        dst.SetChain(src.GetChain());
    }
    else {
        dst.ResetChain();
    }
    if ( src.IsSetRel() ) {
        dst.SetRel().Assign(src.GetRel());
    }
    else {
        dst.ResetRel();
    }
}


void CSeq_id::Assign(const CSerialObject& obj, ESerialRecursionMode how)
{
    if ( GetTypeInfo() == obj.GetThisTypeInfo() ) {
        const CSeq_id& id = static_cast<const CSeq_id&>(obj);
        switch ( id.Which() ) {
        case e_not_set:
            Reset();
            return;
        case e_Local:
            x_Assign(SetLocal(), id.GetLocal());
            return;
        case e_Gibbsq:
            SetGibbsq(id.GetGibbsq());
            return;
        case e_Gibbmt:
            SetGibbmt(id.GetGibbmt());
            return;
        case e_Giim:
            x_Assign(SetGiim(), id.GetGiim());
            return;
        case e_Pir:
            x_Assign(SetPir(), id.GetPir());
            return;
        case e_Swissprot:
            x_Assign(SetSwissprot(), id.GetSwissprot());
            return;
        case e_Patent:
            x_Assign(SetPatent(), id.GetPatent());
            return;
        case e_Other:
            x_Assign(SetOther(), id.GetOther());
            return;
        case e_General:
            x_Assign(SetGeneral(), id.GetGeneral());
            return;
        case e_Gi:
            SetGi(id.GetGi());
            return;
        case e_Prf:
            x_Assign(SetPrf(), id.GetPrf());
            return;
        case e_Pdb:
            x_Assign(SetPdb(), id.GetPdb());
            return;
        case e_Genbank:
            x_Assign(SetGenbank(), id.GetGenbank());
            return;
        case e_Embl:
            x_Assign(SetEmbl(), id.GetEmbl());
            return;
        case e_Ddbj:
            x_Assign(SetDdbj(), id.GetDdbj());
            return;
        case e_Tpg:
            x_Assign(SetTpg(), id.GetTpg());
            return;
        case e_Tpe:
            x_Assign(SetTpe(), id.GetTpe());
            return;
        case e_Tpd:
            x_Assign(SetTpd(), id.GetTpd());
            return;
        case e_Gpipe:
            x_Assign(SetGpipe(), id.GetGpipe());
            return;
        case e_Named_annot_track:
            x_Assign(SetNamed_annot_track(), id.GetNamed_annot_track());
            return;
        }
    }
    CSerialObject::Assign(obj, how);
}


inline bool CanCmpAcc(CSeq_id::E_Choice choice)
{
    switch ( choice ) {
    case CSeq_id::e_Genbank:
    case CSeq_id::e_Embl:
    case CSeq_id::e_Ddbj:
    case CSeq_id::e_Tpg:
    case CSeq_id::e_Tpe:
    case CSeq_id::e_Tpd:
    case CSeq_id::e_Gpipe:
    case CSeq_id::e_Named_annot_track:
        return true;
    default:
        return false;
    }
}


// Compare() - are SeqIds equivalent?
CSeq_id::E_SIC CSeq_id::Compare(const CSeq_id& sid2) const
{
    if ( Which() != sid2.Which() ) { // Only one case where this will work
        if (!CanCmpAcc(Which()) || !CanCmpAcc(sid2.Which())) {
            return e_DIFF;
        }
        const CTextseq_id *tsip1 = GetTextseq_Id();
        if ( !tsip1 )
            return e_DIFF;

        const CTextseq_id *tsip2 = sid2.GetTextseq_Id();
        if ( !tsip2 )
            return e_DIFF;

        if ( tsip1->Match(*tsip2) ) // id Textseq_id match
            return e_YES;
        else
            return e_NO;
    }

    switch ( Which() ) { // Now we only need to know one
    case e_Local:
        return GetLocal().Match(sid2.GetLocal()) ? e_YES : e_NO;
    case e_Gibbsq:
        return GetGibbsq() == sid2.GetGibbsq() ? e_YES : e_NO;
    case e_Gibbmt:
        return GetGibbmt() == sid2.GetGibbmt() ? e_YES : e_NO;
    case e_Giim:
        return GetGiim().GetId() == sid2.GetGiim().GetId() ? e_YES : e_NO;
    case e_Pir:
        return GetPir().Match(sid2.GetPir()) ? e_YES : e_NO;
    case e_Swissprot:
        return GetSwissprot().Match(sid2.GetSwissprot()) ? e_YES : e_NO;
    case e_Patent:
        return GetPatent().Match(sid2.GetPatent()) ? e_YES : e_NO;
    case e_Other:
        return GetOther().Match(sid2.GetOther()) ? e_YES : e_NO;
    case e_General:
        if ( GetGeneral().Match(sid2.GetGeneral()) ) {
            return e_YES;
        }
        else if ( NStr::CompareNocase(GetGeneral().GetDb(),
            sid2.GetGeneral().GetDb()) ) {
                return e_DIFF;
        }
        return e_NO;
    case e_Gi:
        return GetGi() == sid2.GetGi() ? e_YES : e_NO;
    case e_Prf:
        return GetPrf().Match(sid2.GetPrf()) ? e_YES : e_NO;
    case e_Pdb:
        return GetPdb().Match(sid2.GetPdb()) ? e_YES : e_NO;
    case e_Genbank:
        return GetGenbank().Match(sid2.GetGenbank()) ? e_YES : e_NO;
    case e_Embl:
        return GetEmbl().Match(sid2.GetEmbl()) ? e_YES : e_NO;
    case e_Ddbj:
        return GetDdbj().Match(sid2.GetDdbj()) ? e_YES : e_NO;
    case e_Tpg:
        return GetTpg().Match(sid2.GetTpg()) ? e_YES : e_NO;
    case e_Tpe:
        return GetTpe().Match(sid2.GetTpe()) ? e_YES : e_NO;
    case e_Tpd:
        return GetTpd().Match(sid2.GetTpd()) ? e_YES : e_NO;
    case e_Gpipe:
        return GetGpipe().Match(sid2.GetGpipe()) ? e_YES : e_NO;
    case e_Named_annot_track:
        return GetNamed_annot_track().Match(sid2.GetNamed_annot_track())
            ? e_YES : e_NO;
    default:
        return e_error;
    }
}


TIntId CSeq_id::CompareOrdered(const CSeq_id& sid2) const
{
    int ret = Which() - sid2.Which();
    if ( ret != 0 ) {
        return ret;
    }
    const CTextseq_id *tsip1 = GetTextseq_Id();
    const CTextseq_id *tsip2 = sid2.GetTextseq_Id();
    if ( tsip1 && tsip2 ) {
        return tsip1->Compare(*tsip2);
    }
    switch ( Which() ) { // Now we only need to know one
    case e_Local:
        return GetLocal().Compare(sid2.GetLocal());
    case e_Gibbsq:
        return GetGibbsq() - sid2.GetGibbsq();
    case e_Gibbmt:
        return GetGibbmt() - sid2.GetGibbmt();
    case e_Giim:
        return GetGiim().GetId() - sid2.GetGiim().GetId();
    case e_Patent:
        return GetPatent().Compare(sid2.GetPatent());
    case e_General:
        return GetGeneral().Compare(sid2.GetGeneral());
    case e_Gi:
        return GetGi() - sid2.GetGi();
    case e_Pdb:
        return GetPdb().Compare(sid2.GetPdb());
    default:
        return 0;
    }
}

typedef SStaticPair<const char*, CSeq_id::E_Choice> TChoiceMapEntry;
// used for binary searching; must be in order.
static const TChoiceMapEntry sc_ChoiceArray[] = {
    { "???",          CSeq_id::e_not_set },
    { "bbm",          CSeq_id::e_Gibbmt },
    { "bbs",          CSeq_id::e_Gibbsq },
    { "dbj",          CSeq_id::e_Ddbj },
    { "ddbj",         CSeq_id::e_Ddbj },
    { "emb",          CSeq_id::e_Embl },
    { "embl",         CSeq_id::e_Embl },
    { "gb",           CSeq_id::e_Genbank },
    { "genbank",      CSeq_id::e_Genbank },
    { "general",      CSeq_id::e_General },
    { "gi",           CSeq_id::e_Gi },
    { "gibbmt",       CSeq_id::e_Gibbmt },
    { "gibbsq",       CSeq_id::e_Gibbsq },
    { "giim",         CSeq_id::e_Giim },
    { "gim",          CSeq_id::e_Giim },
    { "gnl",          CSeq_id::e_General },
    { "gpipe",        CSeq_id::e_Gpipe },
    { "gpp",          CSeq_id::e_Gpipe },
    { "lcl",          CSeq_id::e_Local },
    { "local",        CSeq_id::e_Local },
    { "named_annot_track", CSeq_id::e_Named_annot_track },
    { "nat",          CSeq_id::e_Named_annot_track },
    { "not_set",      CSeq_id::e_not_set },
    { "oth",          CSeq_id::e_Other }, // deprecated vs. ref
    { "other",        CSeq_id::e_Other },
    { "pat",          CSeq_id::e_Patent },
    { "patent",       CSeq_id::e_Patent },
    { "pdb",          CSeq_id::e_Pdb },
    { "pgp",          CSeq_id::e_Patent },
    { "pir",          CSeq_id::e_Pir },
    { "prf",          CSeq_id::e_Prf },
    { "ref",          CSeq_id::e_Other },
    { "sp",           CSeq_id::e_Swissprot },
    { "swissprot",    CSeq_id::e_Swissprot },
    { "tpd",          CSeq_id::e_Tpd },
    { "tpe",          CSeq_id::e_Tpe },
    { "tpg",          CSeq_id::e_Tpg },
    { "tr",           CSeq_id::e_Swissprot }
};
typedef CStaticPairArrayMap<CTempString, CSeq_id::E_Choice,
                            PNocase_Generic<CTempString> > TChoiceMap;
DEFINE_STATIC_ARRAY_MAP_WITH_COPY(TChoiceMap, sc_ChoiceMap, sc_ChoiceArray);


static const char* const s_TextId[CSeq_id::e_MaxChoice+1] =
{   // FASTA_LONG formats
    "???" , // not-set = ???
    "lcl",  // local = lcl|integer or string
    "bbs",  // gibbsq = bbs|integer
    "bbm",  // gibbmt = bbm|integer
    "gim",  // giim = gim|integer
    "gb",   // genbank = gb|accession|locus
    "emb",  // embl = emb|accession|locus
    "pir",  // pir = pir|accession|name
    "sp",   // swissprot = sp|accession|name *OR* tr|accession|name
    "pat",  // patent = pat|country|patent number (string)|seq number (integer)
            //     *OR* pgp|country|application number|seq number
    "ref",  // other = ref|accession|name|release - changed from oth to ref
    "gnl",  // general = gnl|database(string)|id (string or number)
    "gi",   // gi = gi|integer
    "dbj",  // ddbj = dbj|accession|locus
    "prf",  // prf = prf|accession|name
    "pdb",  // pdb = pdb|entry name (string)|chain id (char)
    "tpg",  // tpg = tpg|accession|name
    "tpe",  // tpe = tpe|accession|name
    "tpd",  // tpd = tpd|accession|name
    "gpp",  // gpipe = gpp|accession|name
    "nat",  // named_annot_track = nat|accession|name
    ""  // Placeholder for end of list
};

CSeq_id::E_Choice CSeq_id::WhichInverseSeqId(const CTempString& SeqIdCode)
{
    TChoiceMap::const_iterator it = sc_ChoiceMap.find(SeqIdCode);
    if (it == sc_ChoiceMap.end()) {
        return e_not_set;
    } else {
        return it->second;
    }
}

static inline bool s_HasFastaTag(const CTempString& s)
{
    // > rather than >= because there should be content after the bar.
    if (s.size() > 3  &&  s[2] == '|') {
        return true;
    } else if (s.size() > 4  &&  s[3] == '|') {
        return true;
    } else {
        return false;
    }
}



static CSeq_id::E_Choice s_CheckForFastaTag(const CTempString& s)
{
    // > rather than >= because there should be content after the bar.
    if (s.size() > 3  &&  s[2] == '|') {
        return CSeq_id::WhichInverseSeqId(s.substr(0, 2));
    } else if (s.size() > 4  &&  s[3] == '|') {
        return CSeq_id::WhichInverseSeqId(s.substr(0, 3));
    } else {
        return CSeq_id::e_not_set;
    }
}


typedef SStaticPair<const char*, CSeq_id::EAccessionInfo> TAccInfoMapEntry;
// used for binary searching; must be in order.
static const TAccInfoMapEntry sc_AccInfoArray[] = {
    { "ambiguous_nuc",           CSeq_id::eAcc_ambiguous_nuc },
    { "ddbj_con",                CSeq_id::eAcc_ddbj_con },
    { "ddbj_dirsub",             CSeq_id::eAcc_ddbj_dirsub },
    { "ddbj_est",                CSeq_id::eAcc_ddbj_est },
    { "ddbj_genome",             CSeq_id::eAcc_ddbj_genome },
    { "ddbj_gss",                CSeq_id::eAcc_ddbj_gss },
    { "ddbj_htgs",               CSeq_id::eAcc_ddbj_htgs },
    { "ddbj_mga",                CSeq_id::eAcc_ddbj_mga },
    { "ddbj_mrna",               CSeq_id::eAcc_ddbj_mrna },
    { "ddbj_other_nuc",          CSeq_id::eAcc_ddbj_other_nuc },
    { "ddbj_patent",             CSeq_id::eAcc_ddbj_patent },
    { "ddbj_prot",               CSeq_id::eAcc_ddbj_prot },
    { "ddbj_tpa_chromosome",     CSeq_id::eAcc_ddbj_tpa_chromosome },
    { "ddbj_tpa_con",            CSeq_id::eAcc_ddbj_tpa_con },
    { "ddbj_tpa_nuc",            CSeq_id::eAcc_ddbj_tpa_nuc },
    { "ddbj_tpa_prot",           CSeq_id::eAcc_ddbj_tpa_prot },
    { "ddbj_tpa_wgs_nuc",        CSeq_id::eAcc_ddbj_tpa_wgs_nuc },
    { "ddbj_tpa_wgs_prot",       CSeq_id::eAcc_ddbj_tpa_wgs_prot },
    { "ddbj_tpa_wgsm_nuc",       CSeq_id::eAcc_ddbj_tpa_wgsm_nuc },
    { "ddbj_tpa_wgsm_prot",      CSeq_id::eAcc_ddbj_tpa_wgsm_prot },
    { "ddbj_tsa_nuc",            CSeq_id::eAcc_ddbj_tsa_nuc },
    { "ddbj_tsa_prot",           CSeq_id::eAcc_ddbj_tsa_prot },
    { "ddbj_wgs_nuc",            CSeq_id::eAcc_ddbj_wgs_nuc },
    { "ddbj_wgs_prot",           CSeq_id::eAcc_ddbj_wgs_prot },
    { "ddbj_wgsm_nuc",           CSeq_id::eAcc_ddbj_wgsm_nuc },
    { "ddbj_wgsm_prot",          CSeq_id::eAcc_ddbj_wgsm_prot },
    { "embl_con",                CSeq_id::eAcc_embl_con },
    { "embl_ddbj",               CSeq_id::eAcc_embl_ddbj },
    { "embl_dirsub",             CSeq_id::eAcc_embl_dirsub },
    { "embl_est",                CSeq_id::eAcc_embl_est },
    { "embl_genome",             CSeq_id::eAcc_embl_genome },
    { "embl_htgs",               CSeq_id::eAcc_embl_htgs },
    { "embl_mga",                CSeq_id::eAcc_embl_mga },
    { "embl_other_nuc",          CSeq_id::eAcc_embl_other_nuc },
    { "embl_patent",             CSeq_id::eAcc_embl_patent },
    { "embl_prot",               CSeq_id::eAcc_embl_prot },
    { "embl_tpa_nuc",            CSeq_id::eAcc_embl_tpa_nuc },
    { "embl_tpa_prot",           CSeq_id::eAcc_embl_tpa_prot },
    { "embl_tpa_wgs_nuc",        CSeq_id::eAcc_embl_tpa_wgs_nuc },
    { "embl_tpa_wgs_prot",       CSeq_id::eAcc_embl_tpa_wgs_prot },
    { "embl_tpa_wgsm_nuc",       CSeq_id::eAcc_embl_tpa_wgsm_nuc },
    { "embl_tpa_wgsm_prot",      CSeq_id::eAcc_embl_tpa_wgsm_prot },
    { "embl_tsa_nuc",            CSeq_id::eAcc_embl_tsa_nuc },
    { "embl_tsa_prot",           CSeq_id::eAcc_embl_tsa_prot },
    { "embl_wgs_nuc",            CSeq_id::eAcc_embl_wgs_nuc },
    { "embl_wgs_prot",           CSeq_id::eAcc_embl_wgs_prot },
    { "embl_wgsm_nuc",           CSeq_id::eAcc_embl_wgsm_nuc },
    { "embl_wgsm_prot",          CSeq_id::eAcc_embl_wgsm_prot },
    { "gb_backbone",             CSeq_id::eAcc_gb_backbone },
    { "gb_cdna",                 CSeq_id::eAcc_gb_cdna },
    { "gb_con",                  CSeq_id::eAcc_gb_con },
    { "gb_ddbj",                 CSeq_id::eAcc_gb_ddbj },
    { "gb_dirsub",               CSeq_id::eAcc_gb_dirsub },
    { "gb_embl",                 CSeq_id::eAcc_gb_embl },
    { "gb_embl_ddbj",            CSeq_id::eAcc_gb_embl_ddbj },
    { "gb_est",                  CSeq_id::eAcc_gb_est },
    { "gb_genome",               CSeq_id::eAcc_gb_genome },
    { "gb_gsdb",                 CSeq_id::eAcc_gb_gsdb },
    { "gb_gss",                  CSeq_id::eAcc_gb_gss },
    { "gb_htgs",                 CSeq_id::eAcc_gb_htgs },
    { "gb_mga",                  CSeq_id::eAcc_gb_mga },
    { "gb_optical_map",          CSeq_id::eAcc_gb_optical_map },
    { "gb_other_nuc",            CSeq_id::eAcc_gb_other_nuc },
    { "gb_patent",               CSeq_id::eAcc_gb_patent },
    { "gb_patent_prot",          CSeq_id::eAcc_gb_patent_prot },
    { "gb_prot",                 CSeq_id::eAcc_gb_prot },
    { "gb_segset",               CSeq_id::eAcc_gb_segset },
    { "gb_sts",                  CSeq_id::eAcc_gb_sts },
    { "gb_targeted_nuc",         CSeq_id::eAcc_gb_targeted_nuc },
    { "gb_tpa_chromosome",       CSeq_id::eAcc_gb_tpa_chromosome },
    { "gb_tpa_con",              CSeq_id::eAcc_gb_tpa_con },
    { "gb_tpa_nuc",              CSeq_id::eAcc_gb_tpa_nuc },
    { "gb_tpa_prot",             CSeq_id::eAcc_gb_tpa_prot },
    { "gb_tpa_wgs_nuc",          CSeq_id::eAcc_gb_tpa_wgs_nuc },
    { "gb_tpa_wgs_prot",         CSeq_id::eAcc_gb_tpa_wgs_prot },
    { "gb_tpa_wgsm_nuc",         CSeq_id::eAcc_gb_tpa_wgsm_nuc },
    { "gb_tpa_wgsm_prot",        CSeq_id::eAcc_gb_tpa_wgsm_prot },
    { "gb_tsa_nuc",              CSeq_id::eAcc_gb_tsa_nuc },
    { "gb_tsa_prot",             CSeq_id::eAcc_gb_tsa_prot },
    { "gb_tsam_nuc",             CSeq_id::eAcc_gb_tsam_nuc },
    { "gb_tsam_prot",            CSeq_id::eAcc_gb_tsam_prot },
    { "gb_wgs_nuc",              CSeq_id::eAcc_gb_wgs_nuc },
    { "gb_wgs_prot",             CSeq_id::eAcc_gb_wgs_prot },
    { "gb_wgsm_nuc",             CSeq_id::eAcc_gb_wgsm_nuc },
    { "gb_wgsm_prot",            CSeq_id::eAcc_gb_wgsm_prot },
    { "general",                 CSeq_id::eAcc_general },
    { "general_nuc",             CSeq_id::eAcc_general_nuc },
    { "general_prot",            CSeq_id::eAcc_general_prot },
    { "gi",                      CSeq_id::eAcc_gi },
    { "gibbmt",                  CSeq_id::eAcc_gibbmt },
    { "gibbsq",                  CSeq_id::eAcc_gibbsq },
    { "giim",                    CSeq_id::eAcc_giim },
    { "gpipe_chromosome",        CSeq_id::eAcc_gpipe_chromosome },
    { "gpipe_genomic",           CSeq_id::eAcc_gpipe_genomic },
    { "gpipe_mrna",              CSeq_id::eAcc_gpipe_mrna },
    { "gpipe_ncrna",             CSeq_id::eAcc_gpipe_ncrna },
    { "gpipe_other_nuc",         CSeq_id::eAcc_gpipe_other_nuc },
    { "gpipe_prot",              CSeq_id::eAcc_gpipe_prot },
    { "gpipe_scaffold",          CSeq_id::eAcc_gpipe_scaffold },
    { "gpipe_unreserved",        CSeq_id::eAcc_gpipe_unreserved },
    { "gsdb_dirsub",             CSeq_id::eAcc_gsdb_dirsub },
    { "local",                   CSeq_id::eAcc_local },
    { "maybe_ddbj",              CSeq_id::eAcc_maybe_ddbj },
    { "maybe_embl",              CSeq_id::eAcc_maybe_embl },
    { "maybe_gb",                CSeq_id::eAcc_maybe_gb },
    { "named_annot_track",       CSeq_id::eAcc_named_annot_track },
    { "patent",                  CSeq_id::eAcc_patent },
    { "pdb",                     CSeq_id::eAcc_pdb },
    { "pir",                     CSeq_id::eAcc_pir },
    { "prf",                     CSeq_id::eAcc_prf },
    { "refseq_chromosome",       CSeq_id::eAcc_refseq_chromosome },
    { "refseq_chromosome_ncbo",  CSeq_id::eAcc_refseq_chromosome_ncbo },
    { "refseq_contig",           CSeq_id::eAcc_refseq_contig },
    { "refseq_contig_ncbo",      CSeq_id::eAcc_refseq_contig_ncbo },
    { "refseq_genome",           CSeq_id::eAcc_refseq_genome },
    { "refseq_genomic",          CSeq_id::eAcc_refseq_genomic },
    { "refseq_mrna",             CSeq_id::eAcc_refseq_mrna },
    { "refseq_mrna_predicted",   CSeq_id::eAcc_refseq_mrna_predicted },
    { "refseq_ncrna",            CSeq_id::eAcc_refseq_ncrna },
    { "refseq_ncrna_predicted",  CSeq_id::eAcc_refseq_ncrna_predicted },
    { "refseq_prot",             CSeq_id::eAcc_refseq_prot },
    { "refseq_prot_predicted",   CSeq_id::eAcc_refseq_prot_predicted },
    { "refseq_unique_prot",      CSeq_id::eAcc_refseq_unique_prot },
    { "refseq_unreserved",       CSeq_id::eAcc_refseq_unreserved },
    { "refseq_wgs_intermed",     CSeq_id::eAcc_refseq_wgs_intermed },
    { "refseq_wgs_nuc",          CSeq_id::eAcc_refseq_wgs_nuc },
    { "refseq_wgs_prot",         CSeq_id::eAcc_refseq_wgs_prot },
    { "refseq_wgsm_intermed",    CSeq_id::eAcc_refseq_wgsm_intermed },
    { "refseq_wgsm_nuc",         CSeq_id::eAcc_refseq_wgsm_nuc },
    { "refseq_wgsm_prot",        CSeq_id::eAcc_refseq_wgsm_prot },
    { "swissprot",               CSeq_id::eAcc_swissprot },
    { "unknown",                 CSeq_id::eAcc_unknown },
    { "unreserved_nuc",          CSeq_id::eAcc_unreserved_nuc },
    { "unreserved_prot",         CSeq_id::eAcc_unreserved_prot }
};
typedef CStaticPairArrayMap<CTempString, CSeq_id::EAccessionInfo,
                            PNocase_Generic<CTempString> > TAccInfoMap;
DEFINE_STATIC_ARRAY_MAP_WITH_COPY(TAccInfoMap, sc_AccInfoMap, sc_AccInfoArray);

static const char kDigits[] = "0123456789";

struct SAccGuide : public CObject
{
    typedef CSeq_id::EAccessionInfo TAccInfo;
    typedef map<string, TAccInfo>   TPrefixes;
    typedef pair<string, TAccInfo>  TPair;
    typedef vector<TPair>           TPairs;
    typedef map<string, TPair>      TSpecialMap; // last -> first -> value
    typedef unsigned int            TFormatCode;

    struct SSubMap {
        TPrefixes    prefixes;
        TPairs       wildcards;
        TSpecialMap  specials;
    };
    typedef map<TFormatCode, SSubMap> TMainMap;

    SAccGuide(void);
    SAccGuide(const string& filename)
        : count(0)
        { x_Load(filename); }
    SAccGuide(ILineReader& lr)
        : count(0)
        { x_Load(lr); }

    void AddRule(const CTempString& rule);
    TAccInfo Find(TFormatCode fmt, const CTempString& acc_or_pfx,
                  string* key_used = NULL);
    static TFormatCode s_Key(unsigned short letters, unsigned short digits)
        { return TFormatCode(letters) << 16 | digits; }

    unsigned int count;
    TMainMap     rules;
    TPrefixes    general;

private:
    void x_Load(const string& filename);
    void x_Load(ILineReader& lr);
    void x_InitGeneral(void);
};

void SAccGuide::AddRule(const CTempString& rule)
{
    CTempString         tmp1, tmp2;
    vector<CTempStringEx> tokens;
    SIZE_TYPE           pos, pos2;

    ++count;
    tmp1.assign(rule, 0, rule.find('#')); // strip comment
    if (tmp1.empty())
        return;
    tokens.reserve(3);
    NStr::Split(tmp1, " \t", tokens,
                NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
    if (tokens.empty()) {
        return;
    } else if (tokens.size() == 2
               &&  NStr::EqualNocase(tokens[0], "version")) {
        unsigned int version = NStr::StringToUInt(tokens[1],
                                                  NStr::fConvErr_NoThrow);
        if (version != 1) {
            ERR_POST_X(2, "SAccGuide::AddRule: " << count
                          << ": Unsupported version " << tokens[1]);
            return;
        }
    } else if ((pos = tokens[0].find('+')) != NPOS
               &&  (tokens.size() == 3
                    ||  (tokens.size() == 4  &&  tokens[3] == "*"))) {
        // _VERIFY(NStr::SplitInTwo(tokens[0], "+", tmp1, tmp2));
        tmp1.assign(tokens[0], 0, pos);
        tmp2.assign(tokens[0], pos + 1, NPOS);
        TFormatCode fmt
            = s_Key(NStr::StringToUInt(tmp1, NStr::fConvErr_NoThrow),
                    NStr::StringToUInt(tmp2, NStr::fConvErr_NoThrow));
        TAccInfoMap::const_iterator it = sc_AccInfoMap.find(tokens[2]);
        if (it == sc_AccInfoMap.end()) {
            string   key_used;
            TAccInfo old = Find(fmt, tokens[1], &key_used);
            if (old != CSeq_id::eAcc_unknown) {
                if ( !key_used.empty() ) {
                    key_used = " (per " + key_used + ')';
                }
                ERR_POST_X(8, Warning << "SAccGuide::AddRule: " << count
                           << ": ignoring refinement of " << tokens[1]
                           << " from 0x" << hex << old << key_used
                           << " to unrecognized accession type " << tokens[2]);
            } else {
                ERR_POST_X(3, "SAccGuide::AddRule: " << count
                           << ": unrecognized accession type " << tokens[2]
                           << " for " << tokens[1]);
            }
        } else {
            TAccInfo value = it->second;
            if (tokens.size() == 4) {
                value = TAccInfo(value | CSeq_id::fAcc_specials);
            }
            if (tokens[1].find_first_of("?*") == NPOS) {
                rules[fmt].prefixes[tokens[1]] = value;
            } else {
                // Account for possible refinements of fallback definitions
                NON_CONST_ITERATE (TPairs, wit, rules[fmt].wildcards) {
                    if (wit->first == tokens[1]) {
                        wit->second = value;
                        return;
                    }
                }
                rules[fmt].wildcards.push_back(TPair(tokens[1], value));
            }
        }
    } else if (tokens.size() == 3 && NStr::EqualNocase(tokens[0], "special")) {
        pos  = tokens[1].find_first_of(kDigits);
        pos2 = tokens[1].find('-', pos);
        TFormatCode fmt
            = s_Key(pos, ((pos2 == NPOS) ? tokens[1].size() : pos2) - pos);
        TAccInfoMap::const_iterator it = sc_AccInfoMap.find(tokens[2]);
        if (it == sc_AccInfoMap.end()) {
            string   key_used;
            TAccInfo old = Find(fmt, tokens[1], &key_used);
            if (old) {
                if ( !key_used.empty() ) {
                    key_used = " (per " + key_used + ')';
                }
                ERR_POST_X(4, Warning << "SAccGuide::AddRule: " << count
                           << ": unrecognized accession type " << tokens[2]
                           << " for special case " << tokens[1]
                           << "; falling back to " << old << key_used);
            } else {
                ERR_POST_X(9, Warning << "SAccGuide::AddRule: " << count
                           << ": unrecognized accession type " << tokens[2]
                           << " for stray(!) special case " << tokens[1]);
            }
        } else {
            TAccInfo value = it->second;
            if (pos2 == NPOS) {
                rules[fmt].specials[tokens[1]] = TPair(tokens[1], value);
            } else {
                // _VERIFY(NStr::SplitInTwo(tokens[1], "-", tmp1, tmp2));
                tmp1.assign(tokens[1], 0, pos2);
                tmp2.assign(tokens[1], pos2 + 1, NPOS);
                rules[fmt].specials[tmp2] = TPair(tmp1, value);
            }
        }
    } else if (tokens.size() == 3 && NStr::EqualNocase(tokens[0], "gnl")) {
        string key(tokens[1]);
        NStr::ToUpper(key);
        TAccInfoMap::const_iterator it = sc_AccInfoMap.find(tokens[2]);
        if (it == sc_AccInfoMap.end()) {
            TPrefixes::const_iterator it2 = general.find(key);
            if (it2 == general.end()) {
                ERR_POST_X(3, "SAccGuide::AddRule: " << count
                           << ": unrecognized accession type " << tokens[2]
                           << " for " << key);
            } else {
                ERR_POST_X(8, Warning << "SAccGuide::AddRule: " << count
                           << ": ignoring refinement of " << key
                           << " from 0x" << hex << it2->second
                           << " to unrecognized accession type " << tokens[2]);
            }
        } else {
            general[key] = it->second;
        }
    } else {
        ERR_POST_X(5, Warning << "SAccGuide::AddRule: " << count
                      << ": ignoring invalid line: " << rule);
    }
}

SAccGuide::TAccInfo SAccGuide::Find(TFormatCode fmt,
                                    const CTempString& acc_or_pfx,
                                    string* key_used)
{
    TMainMap::const_iterator it = rules.find(fmt);
    if (it == rules.end()) {
        return CSeq_id::eAcc_unknown;
    }

    const SSubMap&            submap = it->second;
    TAccInfo                  result = CSeq_id::eAcc_unknown;
    CTempString               pfx     (acc_or_pfx, 0, fmt >> 16);
    TPrefixes::const_iterator pit    = submap.prefixes.find(pfx);
    if (pit != submap.prefixes.end()) {
        result = pit->second;
    } else {
        ITERATE (TPairs, wit, submap.wildcards) {
            if (NStr::MatchesMask(pfx, wit->first)) {
                if (key_used  &&  acc_or_pfx != wit->first) {
                    *key_used = wit->first;
                }
                result = wit->second;
                break;
            }
        }
    }
    if (acc_or_pfx != pfx  &&  result & CSeq_id::fAcc_specials) {
        TSpecialMap::const_iterator sit
            = submap.specials.lower_bound(acc_or_pfx);
        if (sit != submap.specials.end()
            &&  !(acc_or_pfx < sit->second.first) ) {
            if (key_used) {
                key_used->erase();
            }
            return sit->second.second;
        } else {
            if (key_used  &&  key_used->empty()) {
                *key_used = pfx;
            }
            return TAccInfo(result & ~CSeq_id::fAcc_specials);
        }
    } else /* if (result != CSeq_id::eAcc_unknown) */ {
        return result;
    }
}


static bool s_IsFileOld(const string& file)
{
    static const char vcs_id_start[] = "# $Id: accguide.inc ";
    if ( !NStr::StartsWith(kBuiltInGuide[0], vcs_id_start) ) {
        return false;
    }
    const char* rev_start = kBuiltInGuide[0] + sizeof(vcs_id_start);
    const char* date_start = strchr(rev_start, ' ');
    if (date_start != NULL) {
        ++date_start;
    } else {
        return false;
    }
    const char* time_start = strchr(date_start + 1, ' ');
    if (time_start != NULL) {
        ++time_start;
    } else {
        return false;
    }
    const char* time_end = strchr(time_start + 1, ' ');
    if (time_end == NULL) {
        return false;
    }
    string builtin_timestamp_str(date_start, time_end - date_start);
    CTime  builtin_timestamp(builtin_timestamp_str, "Y-M-D h:m:sZ");
    CTime  file_timestamp;
    CFile(file).GetTime(&file_timestamp);
    return file_timestamp < builtin_timestamp;
}


SAccGuide::SAccGuide(void)
    : count(0)
{
    bool file_is_old = false;
    {{
        string file = g_FindDataFile("accguide.txt");
        if ( !file.empty()  &&  !(file_is_old = s_IsFileOld(file))) {
            try {
                x_Load(file);
            } STD_CATCH_ALL_X(1, "SAccGuide::SAccGuide")
        }
    }}
    if (count == 0) {
        if (file_is_old) {
            ERR_POST_X(12, Info << "CSeq_id::IdentifyAccession: " // minor lie
                       "using built-in rules because accguide.txt is older.");
        } else {
            ERR_POST_X(6, Info << "CSeq_id::IdentifyAccession: "
                       "falling back on built-in rules.");
        }
        static const unsigned int kNumBuiltInRules
            = sizeof(kBuiltInGuide) / sizeof(*kBuiltInGuide);
        for (unsigned int i = 0;  i < kNumBuiltInRules;  ++i) {
            AddRule(kBuiltInGuide[i]);
        }
    }
    x_InitGeneral();
}

void SAccGuide::x_InitGeneral(void)
{
    if (general.empty()) {
        // Populate with a hard-coded list by default; there are only
        // a few tags to worry about, but listing them in accguide.txt
        // right away would yield warnings from old Toolkit versions.
        static const char* const kNucDBs[] = {
            "SRA", "TI", "TR_ASSM_CH", "TRACE_ASSM", "TRACE_CHGR", NULL
        };
        for (const char* const* p = kNucDBs;  *p;  ++p) {
            general[*p] = CSeq_id::eAcc_general_nuc;
        }
    }
}

void SAccGuide::x_Load(const string& filename)
{
    CRef<ILineReader> in(ILineReader::New(filename));
    x_Load(*in);
}

void SAccGuide::x_Load(ILineReader& in)
{
    do {
        AddRule(*++in);
    } while ( !in.AtEOF() );
}

static CRef<SAccGuide>* s_CreateGuide(void)
{
    return new CRef<SAccGuide>(new SAccGuide);
}

static CSafeStatic<CRef<SAccGuide> > s_Guide(s_CreateGuide, NULL);

CSeq_id::EAccessionInfo CSeq_id::IdentifyAccession(const CTempString& acc,
                                                   TParseFlags flags)
{
    SIZE_TYPE main_size = acc.find('.');
    bool has_version = true;
    if (main_size == NPOS) {
        has_version = false;
        main_size = acc.size();
    } else if (main_size >= acc.size() - 1
               ||  acc.find_first_not_of(kDigits, main_size + 1) != NPOS) {
        return eAcc_unknown; // non-numeric "version"
    }

    static const SIZE_TYPE kMainAccBufSize = 32;
    if (main_size <= kMainAccBufSize) {
        const unsigned char* ucdata = (const unsigned char*)acc.data();
        char main_acc_buf[kMainAccBufSize];
        for (SIZE_TYPE i = 0;  i < main_size;  ++i) {
            main_acc_buf[i] = toupper(ucdata[i]);
        }
        CTempString main_acc(main_acc_buf, main_size);
        return x_IdentifyAccession(main_acc, flags, has_version);
    } else {
        // Unlikely to prove recognizable (far too long for any standard
        // format as of January 2016), but try anyway.
        string main_acc(acc, 0, main_size);
        NStr::ToUpper(main_acc);
        return x_IdentifyAccession(main_acc, flags, has_version);
    }
}
     
CSeq_id::EAccessionInfo
CSeq_id::x_IdentifyAccession(const CTempString& main_acc, TParseFlags flags,
                             bool has_version)
{
    SIZE_TYPE digit_pos = main_acc.find_first_of(kDigits),
        main_size = main_acc.size();
    char flag_char = '\0';
    if (digit_pos == NPOS) {
        return eAcc_unknown;
    } else {
        SIZE_TYPE non_dig_pos = main_acc.find_first_not_of(kDigits, digit_pos);
        const unsigned char* ucdata = (const unsigned char*)main_acc.data();
        if (non_dig_pos != NPOS  &&  (flags & fParse_RawText) != 0) {
            if ( !has_version  &&  digit_pos == 0  &&  main_size >= 4
                &&  main_size <= 7  &&  isalnum(ucdata[1])
                &&  isalnum(ucdata[2])  &&  isalnum(ucdata[3])) {
                // Possible PDB (always unversioned); examine further
                // to avoid false positives.
                switch (main_size) {
                case 7:
                    if ((main_acc[5] != main_acc[6]
                         &&  (main_acc[5] != 'V' || main_acc[6] != 'B'))
                        ||  !isalpha(ucdata[5])) {
                        break;
                    } // else fall through
                case 6:
                    // Be extra strict when the potential molecule ID
                    // could simply be a year.  (NB: *insisting* on a
                    // non-digit would rule out 1914|A, gi 157829621.)
                    if ((non_dig_pos < 4  &&  ispunct(ucdata[4]))
                        ||  strchr("|-_", main_acc[4])) {
                        return eAcc_pdb;
                    }
                    break;
                case 5:
                    if ((flags & fParse_ValidLocal) == 0) {
                        break;
                    } // else fall through
                case 4:
                    return eAcc_pdb;
                }
            }
            if (digit_pos == 1  &&  main_size == 6
                &&  (main_acc[0] == 'O'  ||  main_acc[0] == 'P'
                     ||  main_acc[0] == 'Q' ||  isalpha(ucdata[2]))
                &&  isdigit(ucdata[1])  &&  isalnum(ucdata[2])
                &&  isalnum(ucdata[3])  &&  isalnum(ucdata[4])
                &&  isdigit(ucdata[5])) {
                return eAcc_swissprot;
            } else if (digit_pos == 1  &&  main_size == 10
                       &&  main_acc[0] != 'O'  &&  main_acc[0] != 'P'
                       &&  main_acc[0] != 'Q'
                       &&  isalpha(ucdata[2])  &&  isalnum(ucdata[3])
                       &&  isalnum(ucdata[4])  &&  isdigit(ucdata[5])
                       &&  isalpha(ucdata[6])  &&  isalnum(ucdata[7])
                       &&  isalnum(ucdata[8])  &&  isdigit(ucdata[9])) {
                return eAcc_swissprot;                
            } else if ( !has_version  &&  digit_pos == 0
                       &&  (non_dig_pos == 6  ||  non_dig_pos == 7)
                       &&  (main_size == non_dig_pos + 1
                            ||  main_acc[non_dig_pos + 1] == ':'
                            ||  (isalpha(ucdata[non_dig_pos + 1])
                                 &&  (main_size == non_dig_pos + 2
                                      ||  main_acc[non_dig_pos + 2] == ':')))) {
                // A formal spec appears to be elusive, but all examples in ID
                // contain six or seven digits followed by one or two letters,
                // followed in some rare cases by a tag such as :PDB=...
                return eAcc_prf;
            } else if (digit_pos >= 4  &&  non_dig_pos == digit_pos + 2
                       &&  main_size - non_dig_pos >= 6  &&  main_acc[3] != '_'
                       &&  (main_acc[non_dig_pos] == 'S'
                            ||  main_acc[non_dig_pos] == 'P')
                       &&  (main_acc.find_first_not_of
                            (kDigits, non_dig_pos + 1) == NPOS)) {
                flag_char = main_acc[non_dig_pos];
            } else {
                return eAcc_unknown;
            }
        }
    }

    if (digit_pos == 0) {
        if ((flags & fParse_RawGI) != 0  &&  !has_version
            &&  main_acc[0] != '0'
            &&  main_acc.find_first_not_of(kDigits) == NPOS) {
            return eAcc_gi; // just digits
        } else {
            return eAcc_unknown; // PDB already handled
        }
    } else if ((flags & fParse_RawText) == 0) {
        return eAcc_unknown;
    }

    SIZE_TYPE flag_len = (flag_char == '\0') ? 0 : 1;
    SIZE_TYPE digit_count = main_size - digit_pos - flag_len;
    EAccessionInfo ai
        = (*s_Guide)->Find(SAccGuide::s_Key(digit_pos, digit_count), main_acc);
    if (flag_char == 'P') {
        switch (ai & eAcc_division_mask) {
        case eAcc_targeted:
        case eAcc_wgs:
        // case eAcc_wgs_intermed:
            ai = EAccessionInfo((ai & eAcc_type_mask) | eAcc_wgs | fAcc_prot);
            break;
        case eAcc_tsa:
            ai = EAccessionInfo((ai & eAcc_type_mask) | eAcc_tsa | fAcc_prot);
            break;
        default:
            ERR_POST_X(11,
                       Warning << main_acc
                       << ": Protein flag found with unexpected division "
                       << ((ai & eAcc_division_mask) >> 8));
            ai = EAccessionInfo((ai & (eAcc_type_mask | eAcc_division_mask))
                                | fAcc_prot);
            break;
        }
    }
    switch (ai & eAcc_division_mask) {
    case eAcc_targeted:
    case eAcc_tsa:
    case eAcc_wgs:
    case eAcc_wgs_intermed:
        if (digit_pos >= 4
            &&  (main_acc.find_first_not_of
                 ("0", digit_pos /* + flag_len */ + 2) == NPOS)) {
            return EAccessionInfo(ai | fAcc_master);
        }
    default:
        break;
    }
    return ai;
}


CSeq_id::EAccessionInfo CSeq_id::IdentifyAccession(TParseFlags flags) const
{
    E_Choice type = Which();
    switch (type) {
    case e_Pir: case e_Swissprot: case e_Prf: // but *NOT* e_Pdb
        return (EAccessionInfo)(type | fAcc_prot); // always just protein

    case e_Genbank: case e_Embl:  case e_Ddbj:
    case e_Tpg:     case e_Tpe:   case e_Tpd:
    case e_Other:   case e_Gpipe: case e_Named_annot_track:
    {
        const CTextseq_id* tsid = GetTextseq_Id();
        if (tsid->IsSetAccession()) {
            // Can't necessarily go straight to x_IdentifyAccession, as
            // the accession may contain lowercase letters.
            EAccessionInfo ai = IdentifyAccession(tsid->GetAccession(), flags);
            if ((ai & eAcc_type_mask) == e_not_set) {
                // We *know* what the type should be....
                return (EAccessionInfo)((ai & eAcc_flag_mask) | type);
            } else if ((ai & eAcc_type_mask) == type) {
                return ai;
            } else { // misidentified or mislabeled; assume the former
                return static_cast<EAccessionInfo>(type);
            }
        } else {
            return static_cast<EAccessionInfo>(type);
        }
    }

    case e_General:
    {
        string db = GetGeneral().GetDb();
        NStr::ToUpper(db);
        SAccGuide::TPrefixes::const_iterator it = (*s_Guide)->general.find(db);
        return it == (*s_Guide)->general.end() ? eAcc_general : it->second;
    }

    default:
        return static_cast<EAccessionInfo>(type);
    }
}


void CSeq_id::LoadAccessionGuide(const string& filename)
{
    s_Guide->Reset(new SAccGuide(filename));
}

void CSeq_id::LoadAccessionGuide(ILineReader& in)
{
    s_Guide->Reset(new SAccGuide(in));
}


static inline
void x_GetLabel_Type(const CSeq_id& id, string* label,
                     CSeq_id::TLabelFlags flags)
{
    unsigned choice = id.Which();
    _ASSERT(choice < CSeq_id::e_MaxChoice);
    if (choice >= CSeq_id::e_MaxChoice) {
        return;
    }

    switch (choice) {
    case CSeq_id::e_General:
        // we may encode 'gnl' or the database name as requested
        if (flags & CSeq_id::fLabel_GeneralDbIsContent) {
            *label += id.GetGeneral().GetDb();
        } else {
            *label += "gnl";
        }
        break;

    case CSeq_id::e_Patent:
        *label += id.GetPatent().GetCit().GetId().IsNumber() ? "pat" : "pgp";
        break;

    default:
        *label += s_TextId[choice];
        break;
    }

    // no extra flag interpretation currently
}


static inline
void x_GetLabel_Content(const CSeq_id& id, string* label,
                        CSeq_id::TLabelFlags flags, int* version)
{
    const CTextseq_id* tsid = id.GetTextseq_Id();

    if (version != NULL) {
        *version = 0;
    }

    //text id
    if (tsid) {
        string str;
        if (tsid->IsSetAccession()) {
            str = tsid->GetAccession();
            NStr::ToUpper(str);
        } else if (tsid->IsSetName()) {
            str = tsid->GetName();
        }

        if (version != NULL && tsid->IsSetVersion()) {
            *version = tsid->GetVersion();
        }
        if ( !str.empty() ) {
            if ( (flags & CSeq_id::fLabel_Version)  &&  tsid->IsSetVersion()) {
                str += "." + NStr::IntToString(tsid->GetVersion());
            }
        }
        *label += str;

    } else { //non-text id
        switch (id.Which()) {
        case CSeq_id::e_not_set:
            break;

        case CSeq_id::e_Local:
            {{
                const CObject_id& oid = id.GetLocal();
                if (oid.IsId()) {
                    *label += NStr::IntToString(oid.GetId());
                } else if (oid.IsStr()) {
                    *label += oid.GetStr();
                }
            }}
            break;

        case CSeq_id::e_Gibbsq:
            *label += NStr::IntToString(id.GetGibbsq());
            break;

        case CSeq_id::e_Gibbmt:
            *label += NStr::IntToString(id.GetGibbmt());
            break;

        case CSeq_id::e_Giim:
            *label += NStr::IntToString(id.GetGiim().GetId());
            break;

        case CSeq_id::e_General:
            {{
                const CDbtag& dbt = id.GetGeneral();
                if ((flags & CSeq_id::fLabel_GeneralDbIsContent) == 0) {
                    *label += dbt.GetDb() + ':';
                }
                if (dbt.GetTag().IsId()) {
                    *label += NStr::IntToString(dbt.GetTag().GetId());
                } else if (dbt.GetTag().IsStr()) {
                    *label += dbt.GetTag().GetStr();
                }
            }}
            break;

        case CSeq_id::e_Patent:
            {{
                const CId_pat& idp = id.GetPatent().GetCit();
                *label += idp.GetCountry();
                // *label += '|';
                *label += (idp.GetId().IsNumber() ?
                           idp.GetId().GetNumber() :
                           idp.GetId().GetApp_number());
                *label += '_'; // |
                *label += NStr::IntToString(id.GetPatent().GetSeqid());
            }}
            break;

        case CSeq_id::e_Gi:
            *label += NStr::NumericToString(id.GetGi());
            break;

        case CSeq_id::e_Pdb:
            {{
                const CPDB_seq_id& pid = id.GetPdb();
                *label += pid.GetMol().Get();
                unsigned char chain = static_cast<unsigned char>(pid.GetChain());
                if (chain > ' ') {
                    *label += '_';
                    if (islower(chain)) {
                        *label += string(SIZE_TYPE(2),
                                         static_cast<char>(toupper(chain)));
                    } else {
                        *label += static_cast<char>(chain);
                    }
                }
            }}
            break;

        default:
            break;
        }
    }
}


void CSeq_id::GetLabel(string* label, ELabelType type, TLabelFlags flags) const
{
    if ( !label ) {
        return;
    }

    switch (type) {
    case eFasta:
        *label = AsFastaString();
        break;

    case eBoth:
        x_GetLabel_Type(*this, label, flags);
        *label += "|";
        x_GetLabel_Content(*this, label, flags, NULL);
        break;

    case eType:
        x_GetLabel_Type(*this, label, flags);
        break;

    case eContent:
        x_GetLabel_Content(*this, label, flags, NULL);
        break;
    }
}


void CSeq_id::GetLabel(string* label, int* version, ELabelType type) const
{
    if ( !label ) {
        return;
    }

    switch (type) {
    case eFasta:
        *label = AsFastaString();
        break;

    case eBoth:
        x_GetLabel_Type(*this, label, 0);
        *label += "|";
        x_GetLabel_Content(*this, label, 0, version);
        break;

    case eType:
        x_GetLabel_Type(*this, label, 0);
        break;

    case eContent:
        x_GetLabel_Content(*this, label, 0, version);
        break;
    }
}



/*Return seqid string with optional version for text seqid type
(default no version).*/
string CSeq_id::GetSeqIdString(bool with_version) const
{
    string label;
    TLabelFlags flags = 0;
    if (with_version) {
        flags |= fLabel_Version;
    }
    GetLabel(&label, eContent, flags);
    return label;
}

string CSeq_id::GetSeqIdString(int* version) const
{
    string label;
    GetLabel(&label, version, eContent);
    return label;
}


void CSeq_id::WriteAsFasta(ostream& out)
    const
{
    unsigned the_type = Which();
    if (the_type >= e_MaxChoice)  // New SeqId type
        the_type = e_not_set;

    if (IsPatent()  &&  !GetPatent().GetCit().GetId().IsNumber() ) {
        out << "pgp|";
    } else if (IsSwissprot()  &&  GetSwissprot().IsSetRelease()
               &&  GetSwissprot().GetRelease() == "unreviewed") {
        out << "tr|";
    } else {
        out << s_TextId[the_type] << '|';
    }

    switch (the_type) {
    case e_not_set:
        break;
    case e_Local:
        GetLocal().AsString(out);
        break;
    case e_Gibbsq:
        out << GetGibbsq();
        break;
    case e_Gibbmt:
        out << GetGibbmt();
        break;
    case e_Giim:
        out << (GetGiim().GetId());
        break;
    case e_Genbank:
        GetGenbank().AsFastaString(out);
        break;
    case e_Embl:
        GetEmbl().AsFastaString(out);
        break;
    case e_Pir:
        GetPir().AsFastaString(out);
        break;
    case e_Swissprot:
        GetSwissprot().AsFastaString(out);
        break;
    case e_Patent:
        GetPatent().AsFastaString(out);
        break;
    case e_Other:
        GetOther().AsFastaString(out);
        break;
    case e_General:
        {
            const CDbtag& dbt = GetGeneral();
            out << (dbt.GetDb()) << '|';  // no Upcase per Ostell - Karl 7/2001
            dbt.GetTag().AsString(out);
        }
        break;
    case e_Gi:
        out << GetGi();
        break;
    case e_Ddbj:
        GetDdbj().AsFastaString(out);
        break;
    case e_Prf:
        GetPrf().AsFastaString(out);
        break;
    case e_Pdb:
        GetPdb().AsFastaString(out);
        break;
    case e_Tpg:
        GetTpg().AsFastaString(out);
        break;
    case e_Tpe:
        GetTpe().AsFastaString(out);
        break;
    case e_Tpd:
        GetTpd().AsFastaString(out);
        break;
    case e_Gpipe:
        // don't suppress version after all
        GetGpipe().AsFastaString(out /*, false */);
        break;
    case e_Named_annot_track:
        GetNamed_annot_track().AsFastaString(out);
        break;
    default:
        out << "[UnknownSeqIdType]";
        break;
    }
}


const string CSeq_id::AsFastaString(void) const
{
    CNcbiOstrstream str;
    WriteAsFasta(str);
    return CNcbiOstrstreamToString(str);
}


//
// GetStringDescr()
// Given a bioseq, return the best possible ID description, in a number of
// appealing formats.  This function can produce FastA-formatted titles or a
// number of sub-titles (GI only, Best Accession with or without version).
//
string CSeq_id::GetStringDescr(const CBioseq& bioseq, EStringFormat fmt)
{
    if (fmt == eFormat_FastA) {
        CNcbiOstrstream ostr;
        WriteAsFasta(ostr, bioseq);
        return CNcbiOstrstreamToString(ostr);
    }

    bool is_na            = bioseq.GetInst().GetMol() != CSeq_inst::eMol_aa;
    CRef<CSeq_id> best_id = FindBestChoice(bioseq.GetId(),
                                           is_na ? CSeq_id::FastaNARank
                                           : CSeq_id::FastaAARank);
    switch (fmt) {
    case eFormat_ForceGI:
        // eForceGI produces a string containing only the GI in FastA format
        // so we have:
        //    gi|####
        ITERATE (CBioseq::TId, iter, bioseq.GetId()) {
            if ( (*iter)->IsGi() ) {
                CNcbiOstrstream out_str;
                (*iter)->WriteAsFasta(out_str);

                return CNcbiOstrstreamToString(out_str);
            }
        }
        break;

    case eFormat_BestWithVersion:
        // eBestWithVersion produces only the 'best' accession name, with
        // its version indicator
        if (best_id.NotEmpty()) {
            string label;
            best_id->GetLabel(&label, eDefault, fLabel_Version);
            return label;
        }
        break;

    case eFormat_BestWithoutVersion:
        // eBestWithoutVersion produces only the 'best' accession name,
        // without its version indicator
        if (best_id.NotEmpty()) {
            string label;
            best_id->GetLabel(&label, eDefault, 0);
            return label;
        }
        break;

    default:
        break;
    }

    // catch-all for unusual events
    return "";
}

CNcbiOstream& CSeq_id::WriteAsFasta(CNcbiOstream& ostr, const CBioseq& bioseq)
{
    bool is_na            = bioseq.GetInst().GetMol() != CSeq_inst::eMol_aa;
    CRef<CSeq_id> best_id = FindBestChoice(bioseq.GetId(),
                                           is_na ? CSeq_id::FastaNARank
                                           : CSeq_id::FastaAARank);

    // FastA format
    // Here we have something like:
    //      gi|###|SOME_ACCESSION|title
    bool found_gi = false;

    ITERATE (CBioseq::TId, id, bioseq.GetId()) {
        if ((*id)->IsGi()) {
            (*id)->WriteAsFasta(ostr);
            found_gi = true;
            break;
        }
    }

    if (best_id.NotEmpty()  &&  !best_id->IsGi() ) {
        if (found_gi) {
            ostr << '|';
        }

        best_id->WriteAsFasta(ostr);
    }

    return ostr;
}


CSeq_id::CSeq_id(const CDbtag& dbtag, bool set_as_general)
{
    Set(dbtag, set_as_general);
}

CSeq_id& CSeq_id::Set(const CDbtag& dbtag, bool set_as_general)
{
    int         version = -1;
    CTempString acc;
    string      accver;

    switch (dbtag.GetTag().Which()) {
    case CObject_id::e_Str:
        accver = dbtag.GetTag().GetStr();
        s_SplitVersion(accver, acc, version);
        break;
    case CObject_id::e_Id:
        acc = accver = NStr::IntToString(dbtag.GetTag().GetId());
        break;
    default:
        NCBI_THROW(CSeqIdException, eFormat,
                   "Bad CDbtag tag type "
                   + CObject_id::SelectionName(dbtag.GetTag().Which()));
        break;
    }

    CDbtag::EDbtagType type = dbtag.GetType();
    switch (type) {
    case CDbtag::eDbtagType_EMBL:
        SetEmbl().Set(accver);
        break;

    case CDbtag::eDbtagType_DDBJ:
        SetDdbj().Set(accver);
        break;

    case CDbtag::eDbtagType_GI:
        if (dbtag.GetTag().IsStr()) {
            Set(e_Gi, dbtag.GetTag().GetStr());
        } else {
            SetGi(GI_FROM(CObject_id::TId, dbtag.GetTag().GetId()));
        }
        break;

    case CDbtag::eDbtagType_bad:
    default:
        // not understood as a sequence id
        if (set_as_general) {
            SetGeneral().Assign(dbtag);
        } else {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Unrecognized Dbtag DB " + dbtag.GetDb());
        }
        break;
    }

    return *this;
}


//SeqIdFastAConstructors
CSeq_id::CSeq_id(const CTempString& the_id, TParseFlags flags)
{
    Set(the_id, flags);
}

CSeq_id& CSeq_id::Set(const CTempString& the_id_in, TParseFlags flags)
{
    CTempString the_id = NStr::TruncateSpaces_Unsafe(the_id_in,
                                                     NStr::eTrunc_Both);
    E_Choice    type   = e_not_set;

    if ((flags & fParse_NoFASTA) == 0) {
        type = s_CheckForFastaTag(the_id);
    }
    if (type == e_not_set) {
        if (the_id.empty()) {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Empty bare accession supplied");
        }
        // If no (attempt at a) valid tag, tries to interpret the string
        // as a pure accession.
        if ((flags & fParse_AnyRaw) != 0) {
            type = GetAccType(IdentifyAccession(the_id, flags));
        }
        switch (type) {
        case e_Gi:
            return Set(type, the_id);
        case e_not_set:
            if ((flags & fParse_ValidLocal) != 0
                &&  ((flags & fParse_AnyLocal) == fParse_AnyLocal
                     ||  IsValidLocalID(the_id))) {
                // Reject internal vertical bars when otherwise permissive?
                if (type == e_Gi  ||  (flags & fParse_AnyRaw) == 0) {
                    return Set(e_Local, the_id);
                } else {
                    SetLocal().SetStr(the_id);
                    return *this;
                }
            } else {
                NCBI_THROW(CSeqIdException, eFormat,
                           "Malformatted ID " + string(the_id));
            }
        case e_Prf:
            // technically a name/locus, not an accession!
            return Set(type, kEmptyStr, the_id);
        case e_Pdb:
        {
            string mol(the_id, 0, 4), chain;
            // NStr::SplitInTwo(the_id, "|", mol, chain);
            if (the_id.size() > 5) {
                chain = the_id.substr(5);
            } else if (the_id.size() == 5  &&  the_id[4] != '|') {
                chain = the_id[4];
            }
            return Set(type, mol, chain);
        }
        default:
        {
            CTempString acc;
            int         ver;
            s_SplitVersion(the_id, acc, ver);
            return Set(type, acc, kEmptyStr, ver);
        }
        }
    } else {
        list<CTempString> fasta_pieces;
        NStr::Split(the_id, "|", fasta_pieces, NStr::fSplit_NoMergeDelims);
        x_Init(fasta_pieces, type);
        if ( !fasta_pieces.empty() ) {
            // tolerate trailing parts if they're all empty.
            ITERATE(list<CTempString>, it, fasta_pieces) {
                if ( !it->empty() ) {
                    if ((flags & fParse_PartialOK) != 0) {
                        ERR_POST_X(10, Warning << "Ignoring extra parts"
                                   " (synonyms?) in FASTA-style ID "
                                   << the_id);
                    } else {
                        NCBI_THROW(CSeqIdException, eFormat,
                                   "FASTA-style ID " + string(the_id)
                                   + " has too many parts.");
                    }
                }
            }
        }
        return *this;
    }
}

bool CSeq_id::IsValidLocalID(const CTempString& s)
{
    static const char* kIllegal = " |=\"";
    CSeq_id_find_pred pred; pred.kSymbols = kIllegal;
    return (!s.empty() && find_if(s.begin(), s.end(), pred) == s.end());
}

SIZE_TYPE CSeq_id::ParseFastaIds(CBioseq::TId& ids, const CTempString& s,
                                 bool allow_partial_failure)
{
    TParseFlags flags = fParse_RawText | fParse_AnyLocal;
    if (allow_partial_failure) {
        flags |= fParse_PartialOK;
    }
    return ParseIDs(ids, s, flags);
}

SIZE_TYPE CSeq_id::ParseIDs(CBioseq::TId& ids, const CTempString& s,
                            TParseFlags flags)
{
    CTempString ss = NStr::TruncateSpaces_Unsafe(s, NStr::eTrunc_Both);
    if (ss.empty()) {
        return 0;
    }

    // first simple check to make it faster
    if (!s_HasFastaTag(ss)) {
        CRef<CSeq_id> id(new CSeq_id(ss, flags | fParse_NoFASTA));
        ids.push_back(id);
        return 1;
    }

    SIZE_TYPE count = 0;
    list<CTempString> fasta_pieces;
    NStr::Split(ss, "|", fasta_pieces, NStr::eNoMergeDelims);
    _ASSERT(fasta_pieces.size() > 0);
    if (fasta_pieces.size() == 1)
    {
        CRef<CSeq_id> id(new CSeq_id(ss, flags | fParse_NoFASTA));
        ids.push_back(id);
        count = 1;
    }
    else
    {
        E_Choice type = WhichInverseSeqId(fasta_pieces.front());       
        if (type == e_not_set)
        {
            // unknown database are reported as 'general'
            fasta_pieces.push_front("gnl");
            type = e_General;
        }
        while ( !fasta_pieces.empty() ) {
            try {
                CRef<CSeq_id> id(new CSeq_id);
                type = id->x_Init(fasta_pieces, type);
                ids.push_back(id);
                ++count;
            } catch (std::exception& e) {
                if ((flags & fParse_PartialOK) != 0) {
                    ERR_POST_X(7, Warning << e.what());
                } else {
                    throw;
                }
            }
        }
    }
    return count;
}


CSeq_id::E_Choice CSeq_id::x_Init(list<CTempString>& fasta_pieces,
                                  E_Choice type)
{
    _ASSERT(!fasta_pieces.empty());
    CTempString typestr = fasta_pieces.front();
    fasta_pieces.pop_front();
    if (type == e_not_set) {
        NStr::TruncateSpacesInPlace(typestr, NStr::eTrunc_Both);
        type = WhichInverseSeqId(typestr);
    } else {
        _ASSERT(NStr::EqualNocase(typestr, s_TextId[type])
                ||  (type == e_Patent && NStr::EqualNocase(typestr, "pgp"))
                ||  (type == e_Swissprot && NStr::EqualNocase(typestr, "tr")));
    }
    if (type == e_not_set  ||  typestr.size() > 3) {
        NCBI_THROW(CSeqIdException, eFormat,
                   "Unsupported ID type " + string(typestr));
    }

    vector<CTempString> fields(3);
    SIZE_TYPE   min_fields, max_fields;
    E_Choice    next_type = e_not_set;
    switch (type) {
    case e_Local:
    case e_Gibbsq:
    case e_Gibbmt:
    case e_Giim:
    case e_Gi:
        min_fields = max_fields = 1;
        break;
    case e_Patent:
        min_fields = max_fields = 3;
        break;
    case e_General:
        min_fields = max_fields = 2;
        break;
#if 0 // release no longer used
    case e_Other:
        min_fields = 1;
        max_fields = 3;
        break;
#endif
    default: // text seqid: accession and optional name
        min_fields = 1;
        max_fields = 2;
        break;
    }

    for (SIZE_TYPE i = 0;  i < max_fields;  ++i) {
        if (fasta_pieces.empty()) {
            if (i >= min_fields) {
                break;
            } else {
                NCBI_THROW(CSeqIdException, eFormat,
                           "Not enough fields for ID of type "
                           + string(typestr));
            }
        } else {
            if (i >= min_fields  &&  fasta_pieces.size() > 1
                &&  (fasta_pieces.front().size() == 2
                     ||  fasta_pieces.front().size() == 3)
                &&  ((next_type = WhichInverseSeqId(fasta_pieces.front()))
                     != e_not_set)) {
                // Likely mid-string optional-field omission;
                // conservatively treat as such only if unable to
                // parse the following piece as an ID type, though.
                list<CTempString>::iterator it = fasta_pieces.begin();
                ++it;
                _ASSERT(it != fasta_pieces.end());
                E_Choice next_type_2;
                if ((it->size() == 2  ||  it->size() == 3)
                    &&  (next_type_2 = WhichInverseSeqId(*it)) != e_not_set) {
                    next_type = next_type_2;
                } else {
                    break;
                }
            }
            fields[i] = fasta_pieces.front();
            fasta_pieces.pop_front();
        }
    }

    // Special case -- dbSNP IDs have historically contained internal
    // vertical bars, so we have to parse them greedily.
    string snp_name; // must survive until the end of the function
    if (type == e_General  &&  NStr::EqualNocase(fields[0], "dbSNP")
        &&  !fasta_pieces.empty() ) {
        snp_name = string(fields[1]) + '|' + NStr::Join(fasta_pieces, "|");
        fields[1] = snp_name;
        fasta_pieces.clear();
    }

    // Clear out extra empty pieces
    while ( !fasta_pieces.empty()  &&  fasta_pieces.front().empty() ) {
        fasta_pieces.pop_front();
    }

    int ver = 0;
    switch (type) {
    case e_Swissprot:
        if (NStr::EqualNocase(typestr, "tr")) {
            fields[2] = "unreviewed";
        } else {
            _ASSERT(NStr::EqualNocase(typestr, "sp"));
            fields[2] = "reviewed";
        }
        break;

    case e_Patent:
        // "version" actually sequence number within patent, but whatever...
        ver = NStr::StringToNonNegativeInt(fields[2]);
        if (ver <= 0) {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Bad sequence number " + string(fields[2]) + " for "
                       + string(fields[0]) + " patent " + string(fields[1]));
        }
        // to distinguish applications from granted patents; the numeric
        // content has already made its way into ver.
        fields[2] = typestr;
        break;

    case e_Pdb:
        if (fields[0].size() > 4  &&  fields[1].empty()) { // misdelimited
            if (fields[0].size() > 5) {
                fields[1] = fields[0].substr(5);
            } else {
                _ASSERT(fields[0][4] != '|');
                fields[1] = fields[0].substr(4);
            }
            fields[0] = fields[0].substr(0, 4);
        }
        break;

    default:
        break; // avoid compiler warnings
    }

    Set(type, fields[0] /* acc */, fields[1] /* name */, ver,
        fields[2] /* rel */);

    return next_type;
}


CSeq_id::CSeq_id(E_Choice the_type, TIntId the_id)
{
    Set(the_type, the_id);
}

CSeq_id& CSeq_id::Set(E_Choice the_type, TIntId the_id)
{
    if (the_id <= 0) {
        NCBI_THROW(CSeqIdException, eFormat,
                   "Non-positive numeric ID " + NStr::NumericToString(the_id));
    }

    switch (the_type) {
    case e_Local:
        SetLocal().SetId(INT_ID_TO(CObject_id::TId, the_id));
        break;
    case e_Gibbsq:
        SetGibbsq(INT_ID_TO(CSeq_id::TGibbsq, the_id));
        break;
    case e_Gibbmt:
        SetGibbmt(INT_ID_TO(CSeq_id::TGibbmt, the_id));
        break;
    case e_Giim:
    {
        CGiimport_id& giim = SetGiim();
        giim.SetId(INT_ID_TO(CGiimport_id::TId, the_id));
        giim.ResetDb();
        giim.ResetRelease();
        break;
    }
    case e_Gi:
        SetGi(the_id);
        break;
    default:
        NCBI_THROW(CSeqIdException, eFormat,
                   "Invalid numeric ID type" + SelectionName(the_type));
    }
    return *this;
}


CSeq_id::CSeq_id(E_Choice           the_type,
                 const CTempString& acc_in,
                 const CTempString& name_in,
                 int                version,
                 const CTempString& release_in)
{
    Set(the_type, acc_in, name_in, version, release_in);
}

// Karl Sirotkin 7/2001

CSeq_id& CSeq_id::Set(E_Choice           the_type,
                      const CTempString& acc_in,
                      const CTempString& name_in,
                      int                version,
                      const CTempString& release_in)
{
    CTempString  acc       = NStr::TruncateSpaces_Unsafe(acc_in,
                                                         NStr::eTrunc_Both);

    int          the_id;
    CTextseq_id* tsid      = 0;
    bool         allow_dot = true;

    switch (the_type) {
    case e_not_set: // Will cause unspecified SeqId to be returned.
        break;

    case e_Local:
        {
            if ( !acc.empty()  &&  acc[0] >= '1'  &&  acc[0] <= '9'
                &&  (the_id = NStr::StringToNonNegativeInt(acc)) > 0) {
                SetLocal().SetId(the_id);
            } else { // to cover case where embedded vertical bar in
                // string, could add code here, to concat a
                // '|' and name string, if not null/empty
                SetLocal().SetStr(acc);
            }
            break;
        }

        // numeric IDs
    case e_Gibbsq:
    case e_Gibbmt:
    case e_Giim:
    case e_Gi:
#ifdef NCBI_INT8_GI
        if ( the_type == e_Gi ) {
            try {
                TIntId gi = NStr::StringToNumeric<TIntId>(acc);
                if ( gi > 0 ) {
                    SetGi(gi);
                    return *this;
                }
            }
            catch ( CException& /*ignored*/ ) {
                // will be processed by the code below
            }
        }
#endif
        if ( (the_id = NStr::StringToNonNegativeInt (acc)) >= 0 ) {
            return Set(the_type, the_id);
        } else {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Negative, excessively large, or non-numeric "
                       + SelectionName(the_type)
                       + " ID " + string(acc));
        }
        break;

        // text IDs
    case e_Genbank:    tsid = &SetGenbank();    break;
    case e_Embl:       tsid = &SetEmbl();       break;
    case e_Pir:        tsid = &SetPir();        allow_dot = false;  break;
    case e_Swissprot:  tsid = &SetSwissprot();  break;
    case e_Other:      tsid = &SetOther();      break;
    case e_Ddbj:       tsid = &SetDdbj();       break;
    case e_Prf:        tsid = &SetPrf();        allow_dot = false;  break;
    case e_Tpg:        tsid = &SetTpg();        break;
    case e_Tpe:        tsid = &SetTpe();        break;
    case e_Tpd:        tsid = &SetTpd();        break;
    case e_Gpipe:      tsid = &SetGpipe();      break;
    case e_Named_annot_track:  tsid = &SetNamed_annot_track();  break;

    case e_Patent:
        {
            CTempString      name      =
                NStr::TruncateSpaces_Unsafe(name_in, NStr::eTrunc_Both);
            CTempString      release   =
                NStr::TruncateSpaces_Unsafe(release_in, NStr::eTrunc_Both);
            CPatent_seq_id&  pat       = SetPatent();
            CId_pat&         id_pat    = pat.SetCit();
            CId_pat::C_Id&   id_pat_id = id_pat.SetId();
            id_pat.SetCountry(acc);

            if (NStr::EqualNocase(release, "pgp")) {
                id_pat_id.SetApp_number(name);
            } else {
                id_pat_id.SetNumber(name);
            }
            id_pat.ResetDoc_type();
            pat.SetSeqid(version);
            break;
        }

    case e_General:
        {
            CTempString name = NStr::TruncateSpaces_Unsafe(name_in,
                                                           NStr::eTrunc_Both);
            CDbtag&     dbt  = SetGeneral();
            dbt.SetDb(acc);
            CObject_id& oid = dbt.SetTag();
            if ( !name.empty()  &&  name[0] >= '1'  &&  name[0] <= '9'
                 &&  (the_id = NStr::StringToNonNegativeInt(name)) > 0) {
                oid.SetId(the_id);
            } else {
                oid.SetStr(name);
            }
            break;
        }

    case e_Pdb:
        {
            CTempString  name = NStr::TruncateSpaces_Unsafe(name_in,
                                                            NStr::eTrunc_Both);
            CPDB_seq_id& pdb  = SetPdb();
            pdb.SetMol().Set(acc);

            // Consult name_in in addition to name as whitespace
            // stripping can lose relevant information here.
            if (name_in.empty()) {
                pdb.ResetChain();
            } else if (name.empty()) {
                pdb.SetChain(' ');
            } else if (name.size() == 1) { // force upper case?
                pdb.SetChain(static_cast<unsigned char>(name[0]));
            } else if (NStr::EqualNocase(name, "VB")) {
                pdb.SetChain('|');
            } else if (name.size() == 2  &&  name[0] == name[1]) {
                pdb.SetChain(tolower(static_cast<unsigned char>(name[0])));
            } else {
                NCBI_THROW(CSeqIdException, eFormat,
                           "Unexpected PDB chain id " + string(name) + " for "
                           + string(acc));
            }
            pdb.ResetRel();
            break;
        }

    default:
        NCBI_THROW(CSeqIdException, eFormat,
                   "Unsupported Seq-id type " + SelectionName(the_type));
    }

    if (tsid) {
        // CTextseq_id::Set will take care of truncating any spaces.
        tsid->Set(acc, name_in, version, release_in, allow_dot);
    }

    return *this;
}


int CSeq_id::BaseTextScore(void) const
{
    switch (Which()) {
    case e_not_set:                                return 83;
    case e_Giim:    case e_Gi:                     return 20;
    case e_General: case e_Gibbsq: case e_Gibbmt:  return 15;
    case e_Local:   case e_Patent:                 return 10;
    case e_Gpipe:   case e_Named_annot_track:      return 9;
    case e_Other:                                  return 8;
    default:                                       return 5;
    }
}


int CSeq_id::BaseBestRankScore(void) const
{
    switch (Which()) {
    case e_not_set:                               return 83;
    case e_General: case e_Local:                 return 80;
    case e_Gibbsq: case e_Gibbmt: case e_Giim:    return 70;
    case e_Named_annot_track:                     return 69;
    case e_Gpipe:                                 return 68;
    case e_Patent:                                return 67;
    case e_Other:                                 return 65;
    case e_Gi:                                    return 51;
    default:                                      return 60;
    }
}


int CSeq_id::BaseFastaNAScore(void) const
{
    switch (Which()) {
        // these few are bogus, at least for nucleotide sequences
    case e_not_set: case e_Giim:
    case e_Pir: case e_Swissprot: case e_Prf:  return 255;
    case e_Local:                              return 230;
    case e_Gi:                                 return 120;
    case e_General:
        {
        const string& db = GetGeneral().GetDb();
        if (db.compare("TMSMART") == 0 ||
            db.compare("BankIt") == 0 ||
            db.compare("NCBIFILE") == 0 )
            return 240;
        else
            return 50;
        }
    case e_Patent:                             return 40;
    case e_Gibbsq: case e_Gibbmt: case e_Pdb:  return 30;
    case e_Other:                              return 15;
    default: /* [third party] GB/EMBL/DDBJ */  return 20;
    }
}


int CSeq_id::BaseFastaAAScore(void) const
{
    switch (Which()) {
    case e_not_set: case e_Giim:   return 255;
    case e_Local:                  return 230;
    case e_Gi:                     return 120;
    case e_General:
        {
        const string& db = GetGeneral().GetDb();
        if (db.compare("TMSMART") == 0 ||
            db.compare("BankIt") == 0 ||
            db.compare("NCBIFILE") == 0)
            return 240;
        else
            return 90;
        }
    case e_Patent:                 return 80;
    case e_Prf:                    return 70;
    case e_Pdb:                    return 50;
    case e_Gibbsq: case e_Gibbmt:  return 40;
    case e_Pir:                    return 30;
    case e_Swissprot:              return 20;
    case e_Other:                  return 15;
    default:                       return 60; // [third party] GB/EMBL/DDBJ
    }
}


int CSeq_id::AdjustScore(int base_score) const
{
    int score = base_score * 10;
    if ( IsGeneral() ) {
        const string& db = GetGeneral().GetDb();
        if ( db == "TRACE" ) {
            // prefer "ti" over "TRACE"
            score += 5;
        }
    }
    else if ( const CTextseq_id* text_id = GetTextseq_Id() ) {
        if ( !text_id->IsSetVersion() ) {
            score += 4;
        }
        if ( !text_id->IsSetAccession() ) {
            score += 3;
        }
        if ( !text_id->IsSetName() ) {
            score += 2;
        }
    }
    return score;
}

bool CSeq_id::IsValid(const CBioseq::TId& ids, TParseFlags flags)
{
    return false;
}

bool CSeq_id::IsValid(const CSeq_id& id, TParseFlags flags)
{
    return false;
}


CTextseq_id* s_GetTextseq_id(const CSeq_id::E_Choice& choice, CSeq_id& match)
{
    switch ( choice ) {
    case CSeq_id::e_Genbank:
        return &match.SetGenbank();
    case CSeq_id::e_Embl:
        return &match.SetEmbl();
    case CSeq_id::e_Pir:
        return &match.SetPir();
    case CSeq_id::e_Swissprot:
        return &match.SetSwissprot();
    case CSeq_id::e_Other:
        return &match.SetOther();
    case CSeq_id::e_Ddbj:
        return &match.SetDdbj();
    case CSeq_id::e_Prf:
        return &match.SetPrf();
    case CSeq_id::e_Tpg:
        return &match.SetTpg();
    case CSeq_id::e_Tpe:
        return &match.SetTpe();
    case CSeq_id::e_Tpd:
        return &match.SetTpd();
    case CSeq_id::e_Gpipe:
        return &match.SetGpipe();
    case CSeq_id::e_Named_annot_track:
        return &match.SetNamed_annot_track();
    default:
        break;
    }
    return 0;
}


void CSeq_id::GetMatchingTextseqIds(TSeqIdHandles& matches) const
{
    const CTextseq_id* orig = GetTextseq_Id();
    if ( !orig ) return;

    bool A = orig->IsSetAccession();
    CTextseq_id::TAccession av = A ? orig->GetAccession() : kEmptyStr;
    bool v = orig->IsSetVersion();
    CTextseq_id::TVersion vv = v ? orig->GetVersion() : 0;
    bool N = orig->IsSetName();
    CTextseq_id::TName nv = N ? orig->GetName() : kEmptyStr;
    bool r = orig->IsSetRelease();
    CTextseq_id::TRelease rv = r ? orig->GetRelease() : kEmptyStr;

    CSeq_id match;
    CTextseq_id& ti = *s_GetTextseq_id(Which(), match);

    if (A  &&  (v  ||  N  ||  r)) {
        // Accession only
        ti.SetAccession(av);
        matches.insert(CSeq_id_Handle::GetHandle(match));
        if (v  &&  (N  ||  r)) {
            // A.v
            ti.SetVersion(vv);
            matches.insert(CSeq_id_Handle::GetHandle(match));
        }
        if ( N ) {
            // Name only
            ti.Reset();
            ti.SetName(nv);
            matches.insert(CSeq_id_Handle::GetHandle(match));
            if (v  ||  r) {
                if ( r ) {
                    // N.r
                    ti.SetRelease(rv);
                    matches.insert(CSeq_id_Handle::GetHandle(match));
                    ti.ResetRelease();
                }
                // A + N
                ti.SetAccession(av);
                matches.insert(CSeq_id_Handle::GetHandle(match));
                if (v  &&  r) {
                    // A.v + N
                    ti.SetVersion(vv);
                    matches.insert(CSeq_id_Handle::GetHandle(match));
                    // A + N.r
                    ti.ResetVersion();
                    ti.SetRelease(rv);
                    matches.insert(CSeq_id_Handle::GetHandle(match));
                }
            }
        }
    }
    else if (N  &&  (v  ||  r)) {
        // N only
        ti.Reset();
        ti.SetName(nv);
        matches.insert(CSeq_id_Handle::GetHandle(match));
        if (v  &&  r) {
            // N.r
            ti.SetRelease(rv);
            matches.insert(CSeq_id_Handle::GetHandle(match));
        }
    }
}


void CSeq_id::GetMatchingIds(TSeqIdHandles& matches) const
{
    switch ( Which() ) {
    // CTextseq_id
    case CSeq_id::e_Genbank:
    case CSeq_id::e_Embl:
    case CSeq_id::e_Pir:
    case CSeq_id::e_Swissprot:
    case CSeq_id::e_Other:
    case CSeq_id::e_Ddbj:
    case CSeq_id::e_Prf:
    case CSeq_id::e_Tpg:
    case CSeq_id::e_Tpe:
    case CSeq_id::e_Tpd:
    case CSeq_id::e_Gpipe:
    case CSeq_id::e_Named_annot_track:
        GetMatchingTextseqIds(matches);
        break;

    // CPDB_seq_id
    case CSeq_id::e_Pdb:
        // 'rel' is optional
        if ( GetPdb().IsSetRel() ) {
            CSeq_id match;
            match.Assign(*this);
            match.SetPdb().ResetRel();
            matches.insert(CSeq_id_Handle::GetHandle(match));
        }
        break;

    // Other types have no matching versions.
    case CSeq_id::e_not_set:
    case CSeq_id::e_Local:     // CObject_id
    case CSeq_id::e_Gibbsq:    // int
    case CSeq_id::e_Gibbmt:    // int
    case CSeq_id::e_Giim:      // CGiimport_id
    case CSeq_id::e_Patent:    // CPatent_seq_id
    case CSeq_id::e_General:   // CDbtag
    case CSeq_id::e_Gi:        // TGi
        return;
    }
}


SSeqIdRange::SSeqIdRange(const CTempString& s, TFlags flags)
    : start(0), stop(0), digits(0), acc_info(CSeq_id::eAcc_unknown)
{
    size_t pos = 0, n = s.size();
    while (pos < n
           &&  (isalpha((unsigned char) s[pos])
                ||  (((flags & fAllowUnderscores) != 0)  &&  s[pos] == '_'))) {
        prefix += s[pos++];
    }
    while (pos < n  &&  isdigit((unsigned char) s[pos])) {
        start = start * 10 + s[pos++] - '0';
        ++digits;
    }
    if (pos == n) {
        stop = start;
        return;
    } else if (s[pos++] != '-') {
        NCBI_THROW(CSeqIdException, eFormat,
                   "Expected hyphen in range " + string(s));
    }

    {{
        string pfx2;
        while (pos < n
               && (isalpha((unsigned char) s[pos])
                   || (((flags & fAllowUnderscores) != 0) && s[pos] == '_'))) {
            pfx2 += s[pos++];
        }
        if ( !pfx2.empty()  &&  pfx2 != prefix) {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Mismatched prefixes in range " + string(s));
        }
    }}
    if (pos + digits != n) {
        NCBI_THROW(CSeqIdException, eFormat,
                   "Mismatched digit counts in range " + string(s));
    }
    while (pos < n  &&  isdigit((unsigned char) s[pos])) {
        stop = stop * 10 + s[pos++] - '0';
    }    
}


CRef<CSeq_id> SSeqIdRange::const_iterator::GetID(void) const
{
    CRef<CSeq_id> ret;

    if (m_Range->acc_info == CSeq_id::eAcc_unknown) {
        m_Range->acc_info = CSeq_id::IdentifyAccession(**this);
        if (m_Range->size() > 1  &&  m_Range->digits == 5) {
            // account for possible non-uniformity
            switch (m_Range->prefix[0]) {
            case 'C': case 'D': case 'c': case 'd':
                if (m_Range->prefix.size() == 3) {
                    m_Range->acc_info = CSeq_id::eAcc_unreserved_prot;
                }
            case 'N': case 'n':
                if (m_Range->prefix.size() == 1) {
                    m_Range->acc_info = CSeq_id::eAcc_unreserved_nuc;
                }
            }
        }
    }

    CSeq_id::E_Choice type = CSeq_id::GetAccType(m_Range->acc_info);
    if (type == CSeq_id::e_not_set) {
        ret = new CSeq_id(**this);
    } else {
        ret = new CSeq_id(type, **this);
    }

    return ret;
}


const string& SSeqIdRange::const_iterator::x_SetAccession(void) const
{
    CNcbiOstrstream oss;
    oss << m_Range->prefix << setw(m_Range->digits) << setfill('0') << m_Number;
    m_Accession = CNcbiOstrstreamToString(oss);
    return m_Accession;
}


END_objects_SCOPE // namespace ncbi::objects::
END_NCBI_SCOPE

#undef NCBI_USE_ERRCODE_X
