pygor3 package

Submodules

pygor3.AIRR module

J gene with allele. If referring to a known reference sequence in a database the relevant gene/allele nomenclature should be followed (e.g., IGHJ4*02 if using IMGT/GENE-DB).

class pygor3.AIRR.AIRR_VDJ_rearrangement(sequence_id=None, sequence=None, scenario_rank=None, scenario_proba_cond_seq=None, pgen=None)

Bases: object

static list_of_fields()
to_dict()
class pygor3.AIRR.AIRR_gene(gene_type)

Bases: object

to_dict()

pygor3.IgorBestScenarios module

Created on Mon Oct 7 10:18:45 2019

@author: alfaceor

class pygor3.IgorBestScenarios.IgorBestScenariosVDJ

Bases: object

getDJ_Region()
getDJ_fasta()
getDJ_ins()
getD_3_dels()
getD_5_dels()
getD_Region()
getD_fasta()
getD_gene_name()
getD_ntsequence()
getJ_5_dels()
getJ_Region()
getJ_fasta()
getJ_gene_name()
getJ_ntsequence()
getVD_Region()
getVD_fasta()
getVD_ins()
getV_3_dels()
getV_Region()
getV_fasta()
getV_gene_name()
getV_ntsequence()
get_DictNicknameProbs()
get_ErrorProb()
get_EventProb()
classmethod load_FromDict(dictBestScenarios)

Return a IgorBestScenariosVDJ instance from a IgorSqlRecord. :param sqlRecordAlign: record of a sql database table. :param strGene_name: gene_name associated to the record. :return: IgorAlignment_data instance

classmethod load_FromEventNameValues(mdl, seq_index, strSeq_index, scenario_dict)

Return a IgorBestScenariosVDJ instance from a dict of names or values. :param strGene_name: gene_name associated to the record. :return: IgorAlignment_data instance

classmethod load_FromLineBestScenario(line, delimiter=';')
classmethod load_FromSQLRecord(sqlRecordBestScenarios)

Return a IgorBestScenariosVDJ instance from a IgorSqlRecord. :param sqlRecordAlign: record of a sql database table. :param strGene_name: gene_name associated to the record. :return: IgorAlignment_data instance

save_scenario_fasta(outfilename)
setModel_Parms(flnModelParms)
str_scenario_fasta()
to_dict()
to_dict_export()
to_dict_names()
to_dict_ntsequences()
to_dict_values()

pygor3.IgorDefaults module

pygor3.IgorDefaults.Igor_dict_order_seq_side = {'Five_prime': 0, 'Three_prime': 1, 'Undefined_side': 2}

enum Event_type {GeneChoice_t , Deletion_t , Insertion_t , Dinuclmarkov_t,Undefined_t}; enum Event_safety{VD_safe = 0 , DJ_safe = 1 , VJ_safe = 2 }; enum Seq_side{ Five_prime =0 , Three_prime = 1 , Undefined_side = 2 }; enum Seq_type {V_gene_seq = 0 , VD_ins_seq = 1 , D_gene_seq = 2 , DJ_ins_seq = 3 , J_gene_seq = 4 , V J_ins_seq = 5}; enum Gene_class{V_gene=0 , VD_genes=1 , D_gene=2 , DJ_genes=3 , J_gene=4 , VJ_genes=5 , VDJ_genes=6 , Undefined_gene=7 };

pygor3.IgorDictionaries module

pygor3.IgorDictionaries.update_igor_batch_dict(igor_wd, igor_batchname)

pygor3.IgorIO module

class pygor3.IgorIO.IgorAlignment_data

Bases: object

classmethod load_FromCSVLine(csvline, strGene_name='', delimiter=';')
classmethod load_FromSQLRecord(sqlRecordAlign, strGene_name='')

Return a IgorAlignment_data instance from a IgorSqlRecord. :param sqlRecordAlign: record of a sql database table. :param strGene_name: gene_name associated to the record. :return: IgorAlignment_data instance

to_dict()
class pygor3.IgorIO.IgorAnchors(path_ref_genome: Union[None, str, pathlib.Path] = None, flnVanchors: Union[None, str, pathlib.Path] = None, flnJanchors: Union[None, str, pathlib.Path] = None, df_Vanchors: Union[None, pandas.core.frame.DataFrame] = None, df_Janchors: Union[None, pandas.core.frame.DataFrame] = None, sep=';')

Bases: object

load_J_dataframe(flnJanchors: Union[None, str, pathlib.Path] = None, sep=';')
load_V_dataframe(flnVanchors: Union[None, str, pathlib.Path] = None, sep=';')
load_dataframes()
classmethod load_from_path(path_ref_genome)
update_default_filenames(path_ref_genome: Union[None, str, pathlib.Path] = None)
class pygor3.IgorIO.IgorBestScenariosVDJ

Bases: object

getDJ_Region()
getDJ_fasta()
getDJ_ins()
getD_3_dels()
getD_5_dels()
getD_Region()
getD_fasta()
getD_gene_name()
getD_ntsequence()
getJ_5_dels()
getJ_Region()
getJ_fasta()
getJ_gene_name()
getJ_ntsequence()
getVD_Region()
getVD_fasta()
getVD_ins()
getV_3_dels()
getV_Region()
getV_fasta()
getV_gene_name()
getV_ntsequence()
get_DictNicknameProbs()
get_ErrorProb()
get_EventProb()
classmethod load_FromDict(dictBestScenarios)

Return a IgorBestScenariosVDJ instance from a IgorSqlRecord. :param sqlRecordAlign: record of a sql database table. :param strGene_name: gene_name associated to the record. :return: IgorAlignment_data instance

classmethod load_FromEventNameValues(mdl, seq_index, strSeq_index, scenario_dict)

Return a IgorBestScenariosVDJ instance from a dict of names or values. :param strGene_name: gene_name associated to the record. :return: IgorAlignment_data instance

classmethod load_FromLineBestScenario(line, delimiter=';')
classmethod load_FromSQLRecord(sqlRecordBestScenarios)

Return a IgorBestScenariosVDJ instance from a IgorSqlRecord. :param sqlRecordAlign: record of a sql database table. :param strGene_name: gene_name associated to the record. :return: IgorAlignment_data instance

save_scenario_fasta(outfilename)
setModel_Parms(flnModelParms)
str_scenario_fasta()
to_dict()
to_dict_names()
to_dict_ntsequences()
class pygor3.IgorIO.IgorEvent_realization

Bases: object

A small class storing for each RecEvent realization its name, value and corresponding index.

classmethod from_dict(event_dict: dict)
classmethod from_pandas(df: pandas.core.frame.DataFrame)
classmethod from_tuple(id, value, name='')
id
name
to_dict()
value
class pygor3.IgorIO.IgorGeneTemplate

Bases: object

get_sequence(gene_name)
class pygor3.IgorIO.IgorIndexedSequence(seq_index=- 1, sequence='')

Bases: object

Return a IgorIndexedSequence instance

classmethod load(seq_index, sequence)
classmethod load_FromCSVline(csvline, delimiter=';')

Return a IgorIndexedSequence instance from a line of IGoR indexed_sequences.csv file. :param csvline: String line of a csv file. :param delimiter: Character to delimitate csv file. :return: IgorIndexedSequence object

classmethod load_FromSQLRecord(sqlRecord)

Return a IgorIndexedSequence instance from a database record accordingly. with the database specification. :param sqlRecord: sqlite record of one entry. :return: IgorIndexedSequence object.

to_dict()

Return a IgorIndexedSequence instance as a python dictionary.

class pygor3.IgorIO.IgorModel(model_parms_file: Union[None, str, pathlib.Path] = None, model_marginals_file: Union[None, str, pathlib.Path] = None, parms: Union[None, pygor3.IgorIO.IgorModel_Parms] = None, marginals: Union[None, pygor3.IgorIO.IgorModel_Marginals] = None, fln_V_gene_CDR3_anchors: Union[None, str, pathlib.Path] = None, fln_J_gene_CDR3_anchors: Union[None, str, pathlib.Path] = None)

Bases: object

Class

IgorModel,

Parameters
  • model_parms_file (Union[None, str, Path]=None) – Path of IGoR’s model parms file

  • model_marginals_file (Union[None, str, Path]=None) – Path of IGoR’s model marginals file

  • parms (Union[None, IgorModel_Parms] = None) – IgorModel_Parms instance

  • marginals (Union[None, IgorModel_Marginals] = None) – IgorModel_Marginals instance

property ErrorRate_dict
J_anchor(id: int)
property J_anchors
property Pconditionals
VE_get_Pmarginal_of_event(strEvent)

Variable elimination to get probabily marginals of event strEvent :parm strEvent: event nickname

VE_get_Pmarginals_initial_factors()
VE_get_factors_by_sum_out_variable(var_to_eliminate, factors)
V_anchor(id: int)
property V_anchors
add_Edge(parent_nickname: str, child_nickname: str)

Add an Edge on Bayes network from parent to child, a new dimension is added to the conditional probabilities. :param parent_nickname: Nickname of parent event. :param child_nickname: Nickname of child event.

construct_sequence_VDJ_from_realization_dict(scen_realization_dict)

return VDJ gene segment, which are the gene with the deletions of palindromic insertions

construct_sequence_VJ_from_realization_dict(scen_realization_dict)

return VJ sequence, which are the gene with the deletions of palindromic insertions

property event_Deletion_nickname_list
property event_DinucMarkov_nickname_list
property event_GeneChoice_D_nickname
property event_GeneChoice_J_nickname
property event_GeneChoice_V_nickname
property event_GeneChoice_nickname_list
property event_Insertion_nickname_list
export_Pmarginal_to_csv(event_nickname: str, *args, **kwargs)
export_csv(fln_prefix, sep=';')

Export model events in different csv files for event. :param fln_prefix: filename prefix to save events files :param sep: csv field separator

export_event_to_csv(strEvent, *args, **kargs)
export_model(model_parms_file=None, model_marginals_file=None)
export_plot_Pconditionals(outfilename_prefix)

Create a pdf file with preliminary plots of conditional probabilities :param outfilename_prefix: Prefix for pdf file

export_plot_Pmarginals(outfilename_prefix)
export_plot_events(outfilename_prefix)
generate_Pmarginals()
generate_sequence_construction_list()

Generate the list of events to reconstruct a sequence from an scenario self.sequence_construction_event_list

generate_xdata()

Load model dictionary with xarray structures from parms and marginals instances

get_AIRR_VDJ_rearragement_dict_from_scenario(scenario, str_sequence, v_offset=0, pgen=None, junction=None, junction_aa=None)
get_AIRR_VJ_rearragement_dict_from_scenario(scenario, str_sequence, v_offset=0, pgen=None, junction=None, junction_aa=None)

Return airr rearragement from scenario.

get_AIRR_from_ps_scenario(ps_scenario, v_offset=0)

Return airr format description from IGoR scenario

get_CDR3_nt_pos_from_ps_scenario(ps_scenario, V_offset=None)

Return tuple of anchors positions V_anchor_in_seq, J_anchor_in_seq :param ps_scenario: Pandas Series of one scenario. :param V_offset: Offset from IGoR’s V alignments.

get_Event_Marginal(event_nickname: str)

Returns an xarray with the marginal probability of the event given the nickname

get_Event_value(event_nickname, index)
get_IgorEvent_realization(ps_scenario, event_nickname: Union[None, str, list] = None)
get_IgorEvent_realization_for_nickname(ps_scenario, event_nickname: str)
get_P_from_scenarios_cols(df_scenarios, colname_list)

Return xarray with marginalize probabilities of listed columns in dataframe scenarios df_scenarios :param df_scenarios: Scenarios with normalize probability. Loaded with self.get_dataframe_scenarios() :param colname_list: List of variables preserve for marginalization

get_P_joint(not_sum_out_nickname_list: list)

Return xarray DataArray of the joint probability of nickname event list :param not_sum_out_nickname_list: list of nickname events to get the probability joint distribution. DinucMarkov events not accepted.

get_P_marginal_from_df_scenarios_cols(df_scenarios, colname_list)

Get marginalize probabilities of df_scenarios

get_VDJ_CDR3_from_df_scenario(df_scenario)
get_VDJ_CDR3_from_scenario(ps_scenario)

Return the numbers of amino acids in vd insertions

get_conditional_entropy_dinucl_function_l_ins(event_nickname_dinucl: str)

Return a function that depends on insertion length H(P_{m_i|l}) = H(p_{ss}) - (l-1) sum_m p_{ss}(m) sum_n T(n|m) log2{T(n|m)} where p_{ss} is the stationary state calculated for T (eigenvector for eigenvalue 1) T(n|m) jump matrix from m to n, where m and n are nucleotides.

static get_cross_entropy(self)
get_dataframe_from_fln_generated_realizations_werr(igor_fln_generated_realizations_werr, sep=';')
get_dataframe_scenarios(fln_scenarios)

Return dataframe scenarios from fln_scenarios file. :param fln_scenarios: filename of IGoR scenarios file.

get_df_Deletion_entropy_contribution()

Return pandas dataframe entropy decomposition for Deletions

get_df_GeneChoice_entropy_contribution()

Return pandas dataframe entropy decomposition for GeneChoice

get_df_Insertion_entropy_contribution()

H(P({m_i})) = H(P_{ins}) - sum_l P_{ins}(l) sum_{m_i |l} H( P(m_i |l) ) Return entropy contributions of insertions considering the diversity of nucleotides.

get_df_entropy_decomposition()

Return entropy decomposition of events in a Dataframe

get_df_events_rearrangement()

Return event dataframe sorted for V(D)J rearrangement

static get_df_normalize_prob(df_scenarios)
get_df_realizations(df_scenarios, event_nickname: Union[None, str] = None)

Return a dataframe column with the id, value and name column of the realization

get_df_realizations_dinucl(df_scenarios, event_nickname)

Return a new dataframe of the Dinucl Markov event with columns id, value and name

get_df_scenario_aln_from_scenario(ps_scenario)

Return a Dataframe with the informations need it to show an alignment from a scenario. :param ps_scenario: Pandas Series (row from df_scenarios)

get_entropy_event(event_nickname)

Returns entropy of event. If event has parents it returns the conditional entropy of the event with this parents H(X|Y) = - sum_{x,y} p(x,y) log{p(x|y)} :param event_nickname: Event nickname to calculate entropy

get_event_realization_of_event(event_nickname, event_id)
get_event_realizations_DataFrame(event_nickname)
get_events_nicknames_list()

Return list of event nicknames in current model

get_events_types_list()

Return list of event types in current model

get_gene_segment_dict(strGene: str, ps_scenario: pandas.core.series.Series)

Return cuted gene or expanded with palidromic insertions for a scenario :param strGene: ‘V’, ‘D’, ‘J’, ‘VD’, ‘DJ’, or ‘VJ’ :param ps_scenario: scenario as a pandas Series.

get_mutual_information()

Return xarray with mutual information

get_mutual_information_events(event_nickname1, event_nickname2)

Return xarray with

get_mutual_information_events_from_df_scenarios(df_scenarios, event_nickname_x, event_nickname_y)

Return mutual information in log10 of the desired events

get_mutual_information_from_df_scenarios(df_scenarios)

Return an xarray with the information the mutual information calculated from scenarios dataframe :param df_scenarios: Scenarios with normalize probability. Loaded with self.get_dataframe_scenarios()

get_nicknames_for_gene_segment(strGene)

Return tuple (‘GeneChoice_nickname’, ‘Five_prime_nickname’, ‘Three_prime_nickname’) to be used to construct a scenario sequence with the function get_gene_segment :param strGene: V, D or J are only accepted.

get_observable_from_df_scenarios(observable_function, df_scenarios: pandas.core.frame.DataFrame)

Return a pandas series with the calculated observable over the df_scenarios dataframe. :param observable_function: This function should use the varibles with self.realization :param df_scenarios: Scenarios dataframe loaded with self.get_dataframe_scenarios.

get_observable_xarray_from_function(observable_func, variables_tuple_list)

observable_func(x,y,z) variables_tuple_list = [(‘v_choice’, ‘id’), (‘vd_ins’, ‘value’), (‘v_3_del’, ‘value’)]

get_ones_xarray_from_list(strEvents_list: list)

Get xarray with labels and dimensions for strEvents_list :param strEvents_list: list of events nickname. :return: xarray with dimensions and coordinates with one as values.

get_probability_matrix_from_event_list_and_scenarios_dataframe(event_list: list, df_scenarios: pandas.core.frame.DataFrame)

Get probability xarray tensor from IGoR’s scenarios dataframe for a given list of events :param event_list: Nickname’s events list. :param df_scenarios: Dataframe with nicknames as headers. :return: xarray of joint probability for event_list calculated from the weighted ocurrencies in df_scenarios.

get_realization_value_from_df_scenarios(df_scenarios, event_nickname)
get_realizations_dict_from_scenario_dict(scenario_realization_dict: dict)
get_sorted_events_nicknames_list()
get_str_seq_from_ps_scenario(ps_scenario: pandas.core.series.Series)

Return string sequence from an evaluated scenario without errors :param ps_scenario: Pandas Series (row from df_scenarios)

get_zero_xarray_from_list(strEvents_list: list)

Get xarray with labels and dimensions for strEvents_list :param strEvents_list: list of events nickname. :return: xarray with dimensions and coordinates with zero as values.

classmethod load_default(IgorSpecie, IgorChain, modelpath=None, ref_genome_path=None)
Returns

IgorModel loaded with the default location for specie and chain

classmethod load_from_directory(model_files_dir)

return a IgorModel from directory with default names ‘model_parms.txt’ and ‘model_marginals.txt’

classmethod load_from_networkx(IgorSpecie, IgorChain)

:return IgorModel loaded with the default location for specie and chain

classmethod load_from_parms_marginals_object(mdl_parms: pygor3.IgorIO.IgorModel_Parms, mdl_marginals: Union[None, pygor3.IgorIO.IgorModel_Marginals] = None)

Load IgorModel from IgorModel_Parms and IgorModel_Marginals instances. If IgorModel_Marginals not provided, a uniform distribution is provided for marginals. :return : IgorModel instance

classmethod load_from_txt(model_parms_file: str, model_marginals_file: Union[None, str] = None)

load model from txt path files (model_parms.txt and model_marginals.txt if model marginals is not specified a uniform distribution is loaded.

classmethod make_default_VDJ(df_V_ref_genome, df_D_ref_genome, df_J_ref_genome, lims_deletions=None, lims_insertions=None)

Create a default VJ model from V and J genes dataframes :param df_V_ref_genome: Pandas Dataframe of Genome reference for V gene with CDR3 anchors :param df_D_ref_genome: Pandas Dataframe of Genome reference for D gene :param df_J_ref_genome: Pandas Dataframe of Genome reference for J gene with CDR3 anchors :param lims_deletions: Tuple with min and maximum value for deletions, e.g. (-4,20). Negative numbers are palidromic insertions :param lims_insertions: Tuple with min and maximum value for deletions, e.g. (0,30)

classmethod make_default_VJ(df_V_ref_genome, df_J_ref_genome, lims_deletions=None, lims_insertions=None)

Create a default VJ model from V and J genes dataframes lims_deletions tuple with min and maximum value for deletions, e.g. (-4,20) lims_insertions tuple with min and maximum value for deletions, e.g. (0,30)

classmethod make_default_from_Dataframe_dict(genomic_dataframe_dict, lims_deletions=None, lims_insertions=None)
classmethod make_default_model_from_IgorRefGenome(genomes: pygor3.IgorIO.IgorRefGenome)
classmethod make_model_default_VDJ_from_dataframes(df_V_ref_genome: pandas.core.frame.DataFrame, df_D_ref_genome: pandas.core.frame.DataFrame, df_J_ref_genome: pandas.core.frame.DataFrame, lims_deletions=None, lims_insertions=None)

Returns IgorModel with uniform ditribution with for the default :param df_V_ref_genome:Union[pd.DataFrame], :param df_D_ref_genome:Union[pd.DataFrame], :param df_J_ref_genome:Union[pd.DataFrame] :return: IgorModel object

classmethod make_model_default_VJ_from_dataframes(df_V_ref_genome: pandas.core.frame.DataFrame, df_J_ref_genome: pandas.core.frame.DataFrame, lims_deletions=None, lims_insertions=None)

Returns IgorModel with uniform ditribution with for the default :param df_V_ref_genome:Union[pd.DataFrame], :param df_D_ref_genome:Union[pd.DataFrame], :param df_J_ref_genome:Union[pd.DataFrame] :return: IgorModel object

plot(event_nickname: str, ax=None)
plot_Bayes_network(ax=None, filename=None)
plot_Event(event_nickname: str, ax=None, **kwargs)
plot_Event_Marginal(event_nickname: str, ax=None, **kwargs)

Plot marginals of model events by nickname

plot_GeneChoice_Pmarginal()
plot_InsertionsDeletions_Pmarginal()
plot_event_Deletion(event_nickname: str, **kwargs)

Return GeneChoice plot

plot_event_DinucMarkov(event_nickname: str, **kwargs)

Return GeneChoice plot

plot_event_GeneChoice(event_nickname: str, **kwargs)

Return GeneChoice plot

plot_event_Insertion(event_nickname: str, **kwargs)

Return Insertion plot

static plot_mutual_information(da_mi, ax=None, **kwargs)
plot_recombination_entropy(ax=None, df_entropy=None)
plot_scenario(ps_scenario, nt_lim: Union[None, tuple, list] = None, show_CDR3=True, seq_aligned: Union[None, tuple, list] = None, ax=None)

Return matplotlib fig, ax :param ps_scenario: Pandas Series scenario :param nt_lim:Union[None,tuple,list] region limits to show the scenario alignment default give boundaries around CDR3, if no anchors in model, show the whole scenario :param show_CDR3: Show CDR3 lines default(=True) :param seq_aligned: List, tuple or np.array of size 3 (seq_index, str_seq, offset)

read_model_from_directory(model_files_dir)

Read model from model files directory. :param model_files_dir: Path to model directory.

read_model_from_txt(model_parms_file: str, model_marginals_file: Union[None, str] = None)

Read model from model_parms.txt and model_marginals.txt. :param model_parms_file: Path to model parms txt file. :param model_marginals_file: Path to model marginals txt file.

realization(ps_scenario, event_nickname: str)pygor3.IgorIO.IgorEvent_realization

Return realization of scenario.

realizations_dict(ps_scenario)

Return Ordered dictionary of realization of scenario.

remove_Edge(parent_nickname: str, child_nickname: str)
scenario_from_database(scenarios_list)
set_genomic_dataframe_dict(dataframe_dict)
set_realization_event_from_DataFrame(event_nickname, new_df)
w_average_function_df_scenarios(observable_func, df_scenarios: pandas.core.frame.DataFrame)

Return average of function weigthed with the probability scenarios

w_covariance_df_scenarios(colname_1: str, colname_2: str, df_scenarios: pandas.core.frame.DataFrame)

Return weighted covariance with the normalized probabilities of the column names given for each scenario (norm_scenario_proba_cond_seq) :param colname_1: column name of df_scenario to calculate the weighted covariance :param colname_2: column name of df_scenario to calculate the weighted covariance :param df_scenarios: Scenarios with normalize probability. Loaded with self.get_dataframe_scenarios()

w_mean_df_scenarios(column_name: str, df_scenarios: pandas.core.frame.DataFrame)

Return weighted mean with the normalized probabilities for each scenario (norm_scenario_proba_cond_seq) :param column_name: column name of df_scenario to calculate the average :param df_scenarios: Scenarios with normalize probability. Loaded with self.get_dataframe_scenarios()

w_variance_df_scenarios(colname_1: str, df_scenarios: pandas.core.frame.DataFrame)

Return weighted covariance with the normalized probabilities of the column names given for each scenario (norm_scenario_proba_cond_seq) :param colname_1: column name of df_scenario to calculate the weighted covariance :param colname_2: column name of df_scenario to calculate the weighted covariance :param df_scenarios: Scenarios with normalize probability. Loaded with self.get_dataframe_scenarios()

write_df_scenario_aln_FASTA(fln_scenario_fasta, ps_scenario)

Write a scenario alignment in a fasta file. :param fln_scenario_fasta: Filename to write align scenario :param ps_scenario: Pandas Series scenario

write_mdldata_dir(model_dir_path, sep=';', b_igor_directory=True)

Export IgorModel and IgorRefGenome :param model_dir_path: Directory to save model in IGoR directory structure :param sep: Field separator, default ‘;’ :param b_igor_directory: If True default IGoR directory structure, default False.

write_model(fln_model_parms, fln_model_marginals, fln_V_gene_CDR3_anchors=None, fln_J_gene_CDR3_anchors=None)

Write model parms and marginals(conditional probabilities) in IGoR txt format files. :param fln_model_parms: Filename for model parameters. :param fln_model_marginals: Filename for model marginals (conditional probabilities). :param fln_V_gene_CDR3_anchors: Filename of CDR3 anchors for V gene(optional). :param fln_J_gene_CDR3_anchors: Filename of CDR3 anchors for J gene(optional).

write_ref_genome(fln_genomicVs=None, fln_genomicDs=None, fln_genomicJs=None, fln_V_gene_CDR3_anchors=None, fln_J_gene_CDR3_anchors=None)

Write ref_genome from genomic_dataframe_dict :param fln_genomicVs: V fasta file :param fln_genomicDs: D fasta file :param fln_genomicJs: J fasta file :param fln_V_gene_CDR3_anchors: V csv anchors file :param fln_J_gene_CDR3_anchors: J csv anchors file

write_ref_genome_dir(ref_genome_dir=None)

Write genome references files in directory ref_genome_dir

class pygor3.IgorIO.IgorModel_Marginals(model_marginals_file=None)

Bases: object

Class to get a list of Events directly from the *_parms.txt :param model_marginals_file: Igor marginals file.

initialize_uniform_event_from_model_parms(event_nickname, parms: pygor3.IgorIO.IgorModel_Parms)
initialize_uniform_from_model_parms(parms: pygor3.IgorIO.IgorModel_Parms)

Update with uniform distribution IgorModel_Marginals from IgorModel_Parms object :param parms: IgorModel_Parms object

classmethod make_uniform_from_parms(parms: pygor3.IgorIO.IgorModel_Parms)
read_model_marginals(filename, dim_names=False)

Reads a model marginals file. Returns a tuple containing a dict containing the individual events probabilities indexed by the events nicknames and a dict containing the list of dimension names/ordering for each event.

update_network_dict_from_model_parms(parms: pygor3.IgorIO.IgorModel_Parms)
write_event_probabilities(ofile, event_nickname)
write_model_marginals(filename=None, model_parms=None)
class pygor3.IgorIO.IgorModel_Parms(model_parms_file=None, fln_V_gene_CDR3_anchors=None, fln_J_gene_CDR3_anchors=None)

Bases: object

Class to get a list of Events directly from the *_parms.txt :param model_parms_file: Igor parms file path.

add_Edge(parent_nickname, child_nickname)
attach_J_anchors_from_file(fln_J_gene_CDR3_anchors, sep=';')

Attach J anchors from file :param fln_J_gene_CDR3_anchors: IGoR’s J anchors file

attach_V_anchors_from_Dataframe(df_V_anchors)
attach_V_anchors_from_file(fln_V_gene_CDR3_anchors, sep=';')

Attach V anchors from file :param fln_V_gene_CDR3_anchors: IGoR’s V anchors file

attach_anchors_from_files(fln_V_gene_CDR3_anchors=None, fln_J_gene_CDR3_anchors=None, sep=';')

Add anchors to IgorModel_Parms from file, pandas dataframe or dictionary 1. Get a dataframe from parms.Event_dict

property df_D_ref_genome: Optional[pandas.core.frame.DataFrame]

Return pandas dataframe with anexed anchors if anchors are available.

property df_J_ref_genome: Optional[pandas.core.frame.DataFrame]

Return pandas dataframe with anexed anchors if anchors are available.

property df_V_ref_genome: Optional[pandas.core.frame.DataFrame]

Return pandas dataframe with anexed anchors if anchors are available.

property event_Deletion_list
property event_DinucMarkov_list
property event_GeneChoice_D: Optional[pygor3.IgorIO.IgorRec_Event]

Return IgorRec_Event GeneChoice and D_gene event from self.Event_list (usual nickname ‘d_gene’)

property event_GeneChoice_J: Optional[pygor3.IgorIO.IgorRec_Event]

Return IgorRec_Event GeneChoice and D_gene event from self.Event_list (usual nickname ‘d_gene’)

property event_GeneChoice_V: Optional[pygor3.IgorIO.IgorRec_Event]

Return IgorRec_Event GeneChoice and V_gene event from self.Event_list (usual nickname ‘v_choice’)

property event_GeneChoice_list
property event_Insertion_list
classmethod from_database(db)
classmethod from_network_dict(network_dict: dict)
from_scenario(scenario, strEvent)
genMarginalFile(model_marginals_file=None)
genPreMarginalDF()
gen_EventDict_DataFrame()
gen_NameNickname_dict()
getBayesGraph()
get_Event(event_nickname_or_name, by_nickname=True)pygor3.IgorIO.IgorRec_Event

Returns the RecEvent with corresponding name or nickname.

get_Event_dependencies(strEvent)
get_Event_list_sorted()
get_Event_realization(event_nickname: str, index: Union[int, list])pygor3.IgorIO.IgorRec_Event

Return event realization by event_nickname and index :param event_nickname: Nickname of event to get realization. :param index: Id of realization in event. :return: IgorRec_Event with nickname ‘event_nickname’ and id ‘index’.

get_EventsName_list()
get_EventsNickname_list()
get_IgorRefGenome()pygor3.IgorIO.IgorRefGenome

Return IgorRefGenome instance from events and df_V_anchors

get_event_dict(str_key, str_value)

Return a python dictionary of the event_dict, like (‘nickname’, ‘priority’) {‘v_choice:7, ‘d_gene’:6, …}

get_scenario_from_line_CSV(str_line, file_header_list, sep=';')
load_Deletion_realizations_by_nickname(event_nickname: str, limits=(- 4, 20))
load_DinucMarkov_realizations_by_nickname(event_nickname: str)
load_GeneChoice_realizations_by_nickname(event_nickname: str, flnGenomic)
load_Insertion_realizations_by_nickname(event_nickname: str, limits=(0, 24))
classmethod load_default(IgorSpecie, IgorChain, modelpath=None, ref_genome_path=None)

Return IGoR default model parms for species and chain specified.

load_events_from_dict(dicto)
classmethod make_default_VDJ(df_V_ref_genome, df_D_ref_genome, df_J_ref_genome, lims_deletions=None, lims_insertions=None)

Create a default VJ model from V and J genes dataframes :param df_V_ref_genome: Pandas Dataframe of Genome reference for V gene with CDR3 anchors :param df_D_ref_genome: Pandas Dataframe of Genome reference for D gene :param df_J_ref_genome: Pandas Dataframe of Genome reference for J gene with CDR3 anchors :param lims_deletions: Tuple with min and maximum value for deletions, e.g. (-4,20). Negative numbers are palidromic insertions :param lims_insertions: Tuple with min and maximum value for deletions, e.g. (0,30)

classmethod make_default_VDJ_from_IgorRefGenome(ref_genome: pygor3.IgorIO.IgorRefGenome, lims_deletions=None, lims_insertions=None)

Return IgorModel_Parms from IgorRefGenome

classmethod make_default_VJ(df_V_ref_genome, df_J_ref_genome, lims_deletions=None, lims_insertions=None)

Create a default VJ model from V and J genes dataframes :param df_V_ref_genome: Pandas Dataframe of Genome reference for V gene with CDR3 anchors :param df_J_ref_genome: Pandas Dataframe of Genome reference for J gene with CDR3 anchors :param lims_deletions: Tuple with min and maximum value for deletions, e.g. (-4,20). Negative numbers are palidromic insertions :param lims_insertions: Tuple with min and maximum value for deletions, e.g. (0,30)

plot_Graph(ax=None, **kwargs)

Return a plot of the bayesian network

read_Edges(ofile)
read_ErrorRate(ofile)
read_Event_list(ofile)
read_model_parms(filename)

Reads a model graph structure from a model params file. Note that for now this method does not read the error rate information.

realiz_dict_from_scenario(scenario)
remove_Edge(parent_nickname, child_nickname)
set_Edges_from_dict(parents_dict)
set_event_realizations_from_DataFrame(event_nickname, df)

Set realizations of a defined event from a pandas dataframe. :param event_nickname: Event nickname to set the realizations :param df: Pandas dataframe with ‘id’, ‘value’, ‘name’ columns (id as index)

update_events_name()
write_Edges(ofile, delimiter=None)
write_ErrorRate(ofile, delimiter=None)
write_Event_list(ofile, delimiter=None)
write_model_parms(filename=None, sep=';')

Writes a model graph structure from a model params object. Note that for now this method does not read the error rate information.

write_ref_genome_dir(ref_genome_dir_path)
class pygor3.IgorIO.IgorRec_Event(event_type, seq_type, seq_side, priority, nickname)

Bases: object

Recombination event class containing event’s name, type, realizations, etc… Similar to IGoR’s C++ RecEvent class.

add_realization(realization)

Add a realization to the RecEvent realizations list.

export_realizations_to_fasta(flnGenomic)
classmethod from_default_nickname(nickname: str)
classmethod from_dict(dict_IgorRec_Event: dict)

Returns a IgorRec_Event based on dictionary

get_realization(index: Union[int, list])pygor3.IgorIO.IgorEvent_realization

get realization object by index :param index: Id of realization :return : IgorEvent_realization

get_realization_DataFrame()

Return an Event realizations as a pandas DataFrame with id, value and name columns and attributes - event_type - seq_type - seq_side - priority - NOT event name, because conflicts with pandas dataframe name - nickname

get_realization_vector()

This methods returns the event realizations sorted by the realization index as a list.

property pd_realizations
set_realization_vector()
set_realization_vector_GeneChoice(flnGenomic: str)

Sets a realization vector from a filename :param flnGenomic: fasta file with the genomic template IMGT or other template.

to_dict()
update_name()

Updates the name of the event (will have no effect if the RecEvent has not been modified since the last call).

update_pd_realizations_from_realizations(realizations: Union[None, list] = None)
update_realizations_from_dataframe(dataframe)

Update realizations with a dataframe (index, value, name)

update_realizations_from_fasta(flnGenomic)
class pygor3.IgorIO.IgorRefGenome(fln_genomicVs: Union[None, str, pathlib.Path] = None, fln_genomicDs: Union[None, str, pathlib.Path] = None, fln_genomicJs: Union[None, str, pathlib.Path] = None, fln_V_gene_CDR3_anchors: Union[None, str, pathlib.Path] = None, fln_J_gene_CDR3_anchors: Union[None, str, pathlib.Path] = None, path_ref_genome: Union[None, str, pathlib.Path] = None)

Bases: object

property D
property J
property V
clean_empty_anchors()

Remove genes without anchors

property df_J_ref_genome

Property that returns J dataframe of genomic templates with anchor’s column if present.

property df_V_ref_genome

Property that returns V dataframe of genomic templates with anchor’s column if present.

property dict_genomicDs
property dict_genomicJs
property dict_genomicVs
get_anchors_dict()
static get_imgt_list_species()
classmethod load_FromSQLRecord_list(sqlrecords_genomicVs=None, sqlrecords_genomicDs=None, sqlrecords_genomicJs=None, sqlrecords_V_gene_CDR3_anchors=None, sqlrecords_J_gene_CDR3_anchors=None)

Return IgorRefGenome from database records.

load_J_anchors_from_file(fln_J_gene_CDR3_anchors, sep=';')

Load V genes dataframe (df_genomicVs) to IgorRefGenome object. :param fln_J_gene_CDR3_anchors: Filename of fasta gene templates for V gene

classmethod load_VDJ_from_IMGT_website(imgt_species, imgt_chain, **kwargs)

Return IgorRefGenome from IMGT website: :param imgt_species: species in IMGT format :param imgt_chain: chain in IMGT format :param modelspath: (Optional) If specified will not be deleted.

classmethod load_VJ_from_IMGT_website(imgt_species, imgt_chain, **kwargs)

Return IgorRefGenome from IMGT website: :param imgt_species: species in IMGT format :param imgt_chain: chain in IMGT format :param modelspath: (Optional) If specified will not be deleted.

load_V_anchors_from_file(fln_V_gene_CDR3_anchors, sep=';')

Load CDR3 V anchors dataframe (df_V_anchors) to IgorRefGenome object. :param fln_V_gene_CDR3_anchors: Filename of csv anchors file templates for V gene

load_dataframes_from_dict(df_genomics_dict)
load_dataframes_from_ref_genome_files(fln_genomicVs: Union[None, str, pathlib.Path] = None, fln_genomicDs: Union[None, str, pathlib.Path] = None, fln_genomicJs: Union[None, str, pathlib.Path] = None, fln_V_gene_CDR3_anchors: Union[None, str, pathlib.Path] = None, fln_J_gene_CDR3_anchors: Union[None, str, pathlib.Path] = None, sep=';')
classmethod load_default(IgorSpecie, IgorChain, modelpath=None, ref_genome=None)

Return IgorRefGenome

classmethod load_from_dataframe_genomics_dict(df_genomics_dict: dict)

Return IgorRefGenome from directory path with default names: genomicVs.fasta, genomicDs.fasta, genomicJs.fasta, V_gene_CDR3_anchors.csv and J_gene_CDR3_anchors.csv :param df_genomics_dict: dictionary with ‘V’, ‘J’ and/or ‘D’ keys with pandas dataframes. :return : IgorRefGenome

classmethod load_from_path(path_ref_genome: Union[str, pathlib.Path])

Return IgorRefGenome from directory path with default names: genomicVs.fasta, genomicDs.fasta, genomicJs.fasta, V_gene_CDR3_anchors.csv and J_gene_CDR3_anchors.csv :param path_ref_genome: Path of directory :return : IgorRefGenome

load_genomicDs_from_file(fln_genomicDs)

Load D genes dataframe (df_genomicDs) to IgorRefGenome object. :param fln_genomicDs: Filename of fasta gene templates for D gene

load_genomicJs_from_file(fln_genomicJs)

Load J genes dataframe (df_genomicJs) to IgorRefGenome object. :param fln_genomicJs: Filename of fasta gene templates for J gene

load_genomicVs_from_file(fln_genomicVs)

Load V genes dataframe (df_genomicVs) to IgorRefGenome object. :param fln_genomicVs: Filename of fasta gene templates for V gene

to_dict()
update_fln_names(path_ref_genome: Union[None, str] = None, fln_genomicVs: Union[None, str] = None, fln_genomicDs: Union[None, str] = None, fln_genomicJs: Union[None, str] = None, fln_V_gene_CDR3_anchors: Union[None, str] = None, fln_J_gene_CDR3_anchors: Union[None, str] = None)

Update genomic filenames :param fln_genomicVs: Path of fasta file for V genomic templates, :param fln_genomicDs: Path of fasta file for D genomic templates, :param fln_genomicJs: Path of fasta file for J genomic templates, :param fln_V_gene_CDR3_anchors: Path of csv anchor file for V genes, :param fln_J_gene_CDR3_anchors: Path of csv anchor file for J genes

write_ref_genome(fln_genomicVs: Union[None, str, pathlib.Path] = None, fln_genomicDs: Union[None, str, pathlib.Path] = None, fln_genomicJs: Union[None, str, pathlib.Path] = None, fln_V_gene_CDR3_anchors: Union[None, str, pathlib.Path] = None, fln_J_gene_CDR3_anchors: Union[None, str, pathlib.Path] = None, sep=';')

Save genomes in files :param fln_genomicVs: Output V gene fasta genomic file. :param fln_genomicDs: Output V gene fasta genomic file. :param fln_genomicJs: Output V gene fasta genomic file. :param fln_V_gene_CDR3_anchors: Output csv anchor file for V gene. :param fln_J_gene_CDR3_anchors: Output csv anchor file for J gene.

write_ref_genome_dir(ref_genome_dir_path, sep=';')

Write ref_genome directory in path :param ref_genome_dir_path: Path to directory to save ref_genomic files. :param sep: default = ‘;’ to save anchors files.

class pygor3.IgorIO.IgorScenario(seq_index: Union[None, int] = None, scenario_rank: Union[None, int] = None, scenario_proba_cond_seq: Union[None, int] = None, realizations_ids_dict: Union[None, dict] = None)

Bases: object

export_to_AIRR_line(scenario_col_list: list, sep='\t')
get_scenario_fasta(mdl: pygor3.IgorIO.IgorModel)
classmethod load_FromLineBestScenario(line, delimiter=';')
classmethod load_FromSQLRecord(sqlRecordScenario: list, sql_scenario_name_type_list: list)
classmethod load_from_dict(dicto)
set_model(mdl: pygor3.IgorIO.IgorModel)

Initiate scenario dictionary with a IgorModel

set_model_from_headers(header_line: str)
to_dict()
class pygor3.IgorIO.IgorTask(igor_exec_path=None, igor_datadir=None, igor_models_root_path=None, igor_species=None, igor_chain=None, igor_model_dir_path=None, igor_path_ref_genome=None, fln_genomicVs=None, fln_genomicDs=None, fln_genomicJs=None, fln_V_gene_CDR3_anchors=None, fln_J_gene_CDR3_anchors=None, igor_wd=None, igor_batchname=None, igor_model_parms_file=None, igor_model_marginals_file=None, igor_read_seqs=None, igor_threads=None, igor_fln_indexed_sequences=None, igor_fln_indexed_CDR3=None, igor_fln_align_V_alignments=None, igor_fln_align_D_alignments=None, igor_fln_align_J_alignments=None, igor_fln_infer_final_marginals=None, igor_fln_infer_final_parms=None, igor_fln_evaluate_final_marginals=None, igor_fln_evaluate_final_parms=None, igor_fln_output_pgen=None, igor_fln_output_scenarios=None, igor_fln_output_coverage=None, igor_fln_generated_realizations_werr=None, igor_fln_generated_seqs_werr=None, igor_fln_generation_info=None, igor_fln_db=None, mdl: Union[None, pygor3.IgorIO.IgorModel] = None, genomes: Union[None, pygor3.IgorIO.IgorRefGenome] = None)

Bases: object

This class should encapsulate all the input parameters and output files when IGoR run.

align(input_sequences: Union[None, str, pathlib.Path, pandas.core.frame.DataFrame, numpy.array, list] = None, mdl: Union[None, str, pathlib.Path, pygor3.IgorIO.IgorModel, pygor3.IgorIO.IgorModel_Parms] = None, igor_wd=None, batch_clean=True, b_best_align_only=False, igor_align_dict_opts: Union[None, dict] = None)
create_db(igor_fln_db=None)
db_export_IgorGenomes(igor_path_ref_genome: Union[None, str] = None, igor_model_dir_path: Union[None, str] = None, fln_genomicVs: Union[None, str] = None, fln_genomicDs: Union[None, str] = None, fln_genomicJs: Union[None, str] = None, fln_V_gene_CDR3_anchors: Union[None, str] = None, fln_J_gene_CDR3_anchors: Union[None, str] = None)

Export from database IGoR’s indexed_seq files :param igor_fln_indexed_sequences: Path of csv file to save IgorIndexedSeq :param fln_genomicVs: Path of fasta file with genomic V templates, (default None), :param fln_genomicDs: Path of fasta file with genomic D templates, (default None), :param fln_genomicJs: Path of fasta file with genomic J templates, (default None), :param fln_V_gene_CDR3_anchors: Path of csv file with genomic V templates, (default None), :param fln_J_gene_CDR3_anchors: Path of csv file with genomic J templates, (default None)

db_export_IgorIndexedSeq(igor_fln_indexed_sequences: Union[None, str] = None)

Export from database IGoR’s indexed_seq files :param igor_fln_indexed_sequences: Path of csv file to save IgorIndexedSeq

db_export_to_igorfiles()
db_get_naive_align_dict_by_seq_index(seq_index)
db_ls()

List igor_db tables

db_plot_naive_align_by_seq_index(seq_index)
db_str_fasta_naive_align_by_seq_index(seq_index)

Given an Sequence index and the corresponding alignments vj/ vdj return a string with considering only offset

classmethod default_model(specie, chain, igor_wd=None, model_parms_file=None, model_marginals_file=None, **kwargs)

Return an IgorTask object

evaluate(input_sequences: Union[str, pandas.core.frame.DataFrame, numpy.ndarray, pathlib.Path], N_scenarios=None, mdl: Optional[pygor3.IgorIO.IgorModel] = None, igor_wd=None, igor_batchname=None, clean_batch=True, airr_format=True, db=True, igor_evaluate_dict_options: Union[None, dict] = None)

Return evaluation of sequences

gen_igor_wd()
gen_random_batchname()
generate(N_seqs=None, mdl=None, igor_wd=None, igor_batchname=None, igor_model_parms_file=None, igor_model_marginals_file=None, igor_db=None, igor_fln_db=None, igor_species=None, igor_chain=None, clean_batch=True, return_scenarios=False, return_df=True)

Generate Sequences using IgorTask

get_dataframe_from_fln_generated_realizations_werr(igor_fln_generated_realizations_werr=None, mdl: Union[None, pygor3.IgorIO.IgorModel] = None)
get_dataframe_from_fln_generated_seqs_werr(igor_fln_generated_seqs_werr=None)
get_dataframe_scenarios(igor_fln_output_scenarios: Union[None, str] = None, mdl: Union[None, pygor3.IgorIO.IgorModel] = None)
get_pgen_pd()
infer(input_sequences: Union[None, str, pathlib.Path, pandas.core.frame.DataFrame, numpy.array, list] = None, model: Union[None, str, pathlib.Path, pygor3.IgorIO.IgorModel, pygor3.IgorIO.IgorModel_Parms] = None, igor_wd=None, batch_clean=True)

Run igor infer with new data and model :param input_sequences: Union[None, str, Path, pd.DataFrame, np.array, list] = None :param model: Union[None, str, Path, IgorModel, IgorModel_Parms] = None

load_IgorModel(igor_model_parms_file: Union[None, str] = None, igor_model_marginals_file: Union[None, str] = None, fln_V_gene_CDR3_anchors: Union[None, str] = None, fln_J_gene_CDR3_anchors: Union[None, str] = None)
load_IgorModel_from_infer_files(igor_fln_infer_final_parms: Union[None, str] = None, igor_fln_infer_final_marginals: Union[None, str] = None, fln_V_gene_CDR3_anchors: Union[None, str] = None, fln_J_gene_CDR3_anchors: Union[None, str] = None)

Load IgorModel from inferred model files. :param igor_fln_infer_final_parms: Path of inferred model parms file. :param igor_fln_infer_final_marginals: Path of inferred model marginals file

load_IgorRefGenome(igor_path_ref_genome=None)
load_VDJ_BS_database(flnIgorBSSQL)
load_VDJ_database(flnIgorSQL)
load_db()

Save all batch and models files in a single sqlite database. return: None

load_db_from_alignments()
load_db_from_anchors()

Load anchors from database

load_db_from_bestscenarios(igor_fln_output_scenarios: Union[None, str] = None, mdl: Union[None, pygor3.IgorIO.IgorModel] = None)
load_db_from_genomes()
load_db_from_indexed_cdr3()
load_db_from_indexed_sequences(igor_fln_indexed_sequences=None)

Load indexed_sequences from database :param igor_fln_indexed_sequences: csv file of indexed_sequences, generated with -read_seqs

load_db_from_inferred_model()
load_db_from_models(mdl=None)
load_db_from_pgen()
classmethod load_from_batchname(batchname, wd=None)
load_mdl_from_db(igor_fln_db: Union[None, str] = None, igor_db: Optional[pygor3.IgorSqliteDB.IgorSqliteDB] = None)

Return a IgorModel object in self.mdl from igor_fln_db or igor_db.

make_model_default_VDJ_from_fasta_files(fln_genomicVs: Union[None, str, pathlib.Path] = None, fln_genomicJs: Union[None, str, pathlib.Path] = None, fln_genomicDs: Union[None, str, pathlib.Path] = None)

Make a default VDJ model from files

make_model_default_VDJ_from_genomes_dir(igor_path_ref_genome=None)
make_model_default_VJ_from_genomes_dir(igor_path_ref_genome=None)
parse_scenarios_to_airr(igor_fln_output_scenarios, airr_fln_output_scenarios)
run_align(igor_read_seqs=None, igor_threads=None, igor_align_dict_opts: Union[None, dict] = None)
run_clean_batch()

Clean all files defined with batchname and igor_wd

run_datadir()
run_demo()
run_generate_to_dataframe(N)
run_pgen(igor_read_seqs=None)
run_read_seqs(igor_read_seqs=None)

Run IGoR’s -read_seqs options

run_scenarios(igor_read_seqs=None, N_scenarios=None)
to_dict()
update_batch_filenames(igor_batchname=None, igor_wd=None)
update_batchname(batchname)
update_model_filenames(igor_model_dir_path: Union[None, str] = None, olga_model_dir_path: Union[None, str] = None, igor_models_root_path: Union[None, str] = None)

Update model filenames :param igor_model_dir_path: Directory path for genome templates. If None default is igor_model_dir_path = igor_models_root_path + “/models” :param igor_models_root_path: Directory path where different species and chain models. If None don’t change default value is get it from run_datadir() “$(igor -getdatadir)/models/”

update_ref_genome(igor_path_ref_genome: Union[None, str] = None, igor_model_dir_path: Union[None, str] = None, genomes: Union[None, pygor3.IgorIO.IgorRefGenome] = None, fln_genomicVs: Union[None, str] = None, fln_genomicDs: Union[None, str] = None, fln_genomicJs: Union[None, str] = None, fln_V_gene_CDR3_anchors: Union[None, str] = None, fln_J_gene_CDR3_anchors: Union[None, str] = None)

Assign names to ref_genome files gene templates and CDR3 anchors :param igor_path_ref_genome: Directory path for genome templates. If None igor_path_ref_genome = igor_model_dir_path + “/ref_genome” :param igor_model_dir_path: Character to delimitate csv file. :param genomes:Union[None,IgorRefGenome]

write_mdldata_dir(igor_mdldata_dir: Union[None, str, pathlib.Path] = None, mdl: Union[None, pygor3.IgorIO.IgorModel] = None)
pygor3.IgorIO.Model

alias of pygor3.IgorIO.IgorModel

pygor3.IgorIO.RefGenome

alias of pygor3.IgorIO.IgorRefGenome

pygor3.IgorIO.command_from_dict_options(dicto: dict)

Return igor options from dictionary

pygor3.IgorIO.ds_SONIA_model(model_dir_path)
pygor3.IgorIO.evaluate(input_sequences: Union[str, pandas.core.frame.DataFrame, numpy.ndarray, list, tuple, pathlib.Path], mdl: pygor3.IgorIO.IgorModel, N_scenarios=None, igor_wd: Union[None, str, pathlib.Path] = None, airr_format=False, batch_clean=True, use_db=False, fln_output: Union[None, str, pathlib.Path] = None, b_V_offset=False, igor_evaluate_dict_opts: Union[None, dict] = None, igor_align_dict_opts: Union[None, dict] = None)

Evaluate input sequences with provided model :param input_sequences:Union[str, pd.DataFrame, np.ndarray, Path] :param mdl:IgorModel :param N_scenarios: Number of scenarios requested, default returns 1. :param airr_format: True return pandas dataframe as AIRR format :param fln_output: Output filename to save scenarios :param b_V_offset: Flag to return V_offset alignments as pandas dataframe. :param igor_wd: working directory :param batch_clean: Remove all temporary files True by default. :param use_db: Flag to save data in sqlite database. :param igor_evaluate_dict_opts: Additional IGoR options for evaluation. :param igor_align_dict_opts: Additional IGoR options for aligns.

pygor3.IgorIO.evaluate_pgen(input_sequences: Union[str, pandas.core.frame.DataFrame, numpy.ndarray, pathlib.Path], mdl: pygor3.IgorIO.IgorModel, igor_wd=None, batch_clean=True, airr_format=False, pgen_columns: Union[None, list] = None, igor_evaluate_dict_options: Union[None, dict] = None)

Evaluate input sequences with provided model :param input_sequences:Union[str, pd.DataFrame, np.ndarray, Path] :param mdl:IgorModel :param batch_clean: Remove all temporary files True by default.

pygor3.IgorIO.execute_command_generator(cmd)
pygor3.IgorIO.genLabel(strName)

Generation of label for a simple identification of genomic template sequence.

pygor3.IgorIO.generate(Nseqs, mdl: pygor3.IgorIO.IgorModel, igor_wd=None, igor_batchname=None, seed=None, clean_batch=True, return_scenarios=False)

Return pandas dataframe with generated sequences Only sequences, not scenarios :param Nseqs: Number of sequences to be generated. :param mdl: IgorModel instance to generate sequences. :param igor_wd: Working directory use to run IGoR to generate sequences. :param seed: Seed to generate sequences. :param batch_clean: If True clean all files used with IGoR, if false keep all files, default True. :param return_scenarios: Flag if True return scenarios dataframe.

pygor3.IgorIO.get_CDR3_VJ_anchors(mdl_ii: pygor3.IgorIO.IgorModel, df_sequences, df_scenarios, df_V_offsets=None)
pygor3.IgorIO.get_IgorModel_from_IgorRefGenome(ref_genome: pygor3.IgorIO.IgorRefGenome)

Return a IgorModel from a IgorRefGenome

pygor3.IgorIO.get_IgorRefGenome_VDJ_from_IMGT(imgt_species, imgt_chain)
pygor3.IgorIO.get_IgorRefGenome_VJ_from_IMGT(imgt_species, imgt_chain)
pygor3.IgorIO.get_default_IgorModel(species, chain)

Return a default IGoR’s model

pygor3.IgorIO.get_imgt_list_species()

Return list of available species in IMGT website

pygor3.IgorIO.get_sonia_input(mdl_ii: pygor3.IgorIO.IgorModel, df_sequences, df_scenarios, df_V_offsets=None)

Return dataframe to use it as input to sonia

pygor3.IgorIO.infer(input_sequences: Union[str, list, pandas.core.frame.DataFrame, numpy.ndarray, pathlib.Path], mdl: pygor3.IgorIO.IgorModel, igor_wd=None, batch_clean=True, return_likelihoods=True, N_iter=None, igor_infer_dict_opts: Union[None, dict] = None, igor_align_dict_opts: Union[None, dict] = None)pygor3.IgorIO.IgorModel

Returns inferred IgorModel from input_sequences with initial model mdl. :param input_sequences: String sequences in list, numpy array, pandas dataframe or file. :param mdl: IgorModel object. :param igor_wd: Working directory use to run IGoR to infer. :param batch_clean: If True clean all files used with IGoR, if false keep all files, default True. :param return_likelihoods: If True return likelihoods pandas dataframe with likelihoods :param N_iter: Number of iterations to infer model. :param igor_infer_dict_opts: Additional IGoR options to pass for inference. :param igor_align_dict_opts: Additional IGoR options to pass for alignment.

pygor3.IgorIO.infer_SONIA_from_directory(mdl_dirname: Union[str, pathlib.Path], data_seqs, num_gen_seqs=200000, reset_gen_seqs=True, custom_model_folder=None, add_error=False, custom_error=None, epochs=10, batch_size=5000, initialize=True, seed=None, validation_split=0.2, monitor=False, verbose=0)

Return SONIA model :param mdl_dirname: Path, str or IgorModel object use to infer the selection model with SONIA :param data_seqs: List of arrays with format [ ‘CDR3_aa’, ‘v_call’, ‘j_call’ ]

pygor3.IgorIO.naive_align(input_sequences: Union[None, str, pathlib.Path, pandas.core.frame.DataFrame, numpy.array, list] = None, mdl: Union[None, str, pathlib.Path, pygor3.IgorIO.IgorModel, pygor3.IgorIO.IgorModel_Parms] = None, igor_wd=None, igor_threads=None, igor_batchname=None, batch_clean=True, igor_align_dict_opts: Union[None, dict] = None)

Return preprocessing with IGoR :param input_sequences: Sequences to align as dataframe, numpy array, list or string. :param mdl: IgorModel :param igor_wd: IGoR working directory :param igor_threads: IGoR number of threads option :param igor_align_dict_opts: Additional options for naive alignment.

pygor3.IgorIO.run_command(cmd)

from http://blog.kagesenshi.org/2008/02/teeing-python-subprocesspopen-output.html

pygor3.IgorIO.run_command_no_output(cmd)

from http://blog.kagesenshi.org/2008/02/teeing-python-subprocesspopen-output.html

pygor3.IgorIO.run_command_print(cmd)
pygor3.IgorIO.save_SONIA_mdl_dirname(qm: sonia.sonia_leftpos_rightpos.SoniaLeftposRightpos, mdl_dirname)

pygor3.IgorIndexedSequence module

Created on Fri Dec 20 12:24:34 2019

@author: alfaceor

class pygor3.IgorIndexedSequence.IgorIndexedSequence

Bases: object

classmethod load_FromCSVline(csvline, delimiter=';')
classmethod load_FromSQLRecord(sqlRecord)
to_dict()

pygor3.IgorIndexedSequencesDB module

class pygor3.IgorIndexedSequencesDB.IgorIndexedSequencesDB

Bases: object

Class to create and load table or database with sequences

classmethod loadDataBase(flnDB)
pygor3.IgorIndexedSequencesDB.create_connection(db_file)
create a database connection to the SQLite database

specified by db_file

Parameters

db_file – database file

Returns

Connection object or None

pygor3.IgorIndexedSequencesDB.create_table(conn, create_table_sql)

create a table from the create_table_sql statement :param conn: Connection object :param create_table_sql: a CREATE TABLE statement :return:

pygor3.IgorIndexedSequencesDB.create_table_sql = '\nCREATE TABLE IF NOT EXISTS IgorIndexedSeq (\n    seq_index integer PRIMARY KEY,\n    sequence text NOT NULL\n);\n'
CREATE TABLE IF NOT EXISTS IgorVGeneTemplate (

vgene_id integer PRIMARY KEY, gene_name text NOT NULL, sequence text NOT NULL,

);

pygor3.IgorIndexedSequencesDB.insert_IgorIndexedSeq_FromCSVline(conn, csvline)

Create a new task :param conn: :param csvline: :return:

pygor3.IgorSQL module

pygor3.IgorSQL.sqlcmd_ct_BestScenarios(nickname_event_type_list: list)

param nickname_event_type_list: list of tuples (nickname, event_type) return sql command to create BestScenarios table.

pygor3.IgorSQL.sqlcmd_ct_Model_Marginals(event_nickname, list_dependencies: list)
pygor3.IgorSQL.sqlcmd_ct_Model_Marginals_DinucMarkov(event_nickname, list_dependencies: list)

pygor3.IgorSqliteDB module

Created on Thu Oct 3 11:02:28 2019

@author: alfaceor

class pygor3.IgorSqliteDB.IgorSqliteDB(igor_fln_db=None)

Bases: object

Class to create and load table or database with sequences

FIXME_calc_IgorBestScenarios_average_of(scenario_function)
Q_CDR3_Anchors_in_db(strGene)
Q_IgorBestScenarios_in_db()
Q_IgorPgen_in_db()
Q_align_in_db()
Q_align_in_db_by_gene(strGene)
Q_model_in_db()
Q_output_in_db()
Q_ref_genome_in_db()
Q_ref_genome_in_db_by_gene(strGene)
Q_sequences_in_db()
appendList_IgorAlignments_data_By_seq_index(strGene_class, seq_index, alnDataList=None)

Append to a list of IgorAlignment_data objects given gene class (“V”, “D”, “J”), seq_index append a list to append the objects. :param strGene_class: string to specify the type of gene V, D or J. :param seq_index: IgorIndexedSequences index. :param alnDataList: List of IgorAlignment_data objects.

attach_table_from_db(flnIgorPgen_to_attach)

ATTACH ‘ahorasi.db’ as otherdb; DETACH otherdb;

calc_IgorBestScenarios_average_of(scenario_function, indices_list=None)
calc_IgorBestScenarios_sum_of(scenario_function)
close_db()
connect_db(flnIgorDB=None)

Connect (or create if not exits) to database

copytable_from_source(tablename_to_copy, fln_source_db)
createSqliteDB(flnIgorDB)

Create a SQLite database with the flnIgorDB sql script.

createSqliteDB_tmp()
classmethod create_db(flnIgorDB)

Connect (or create if not exits) with filename

delete_IgorAlignments_Tables()

Method to delete IgorAlignments table.

delete_IgorBestScenarios_Tables()
delete_IgorGeneAnchors_Tables()

Method to delete IgorIndexedSeq table.

delete_IgorGeneTemplate_Tables()

Method to delete IgorIndexedSeq table.

delete_IgorIndexedCDR3_Tables()

Method to delete IgorAlignments table.

delete_IgorIndexedSeq_Tables()

Method to delete IgorIndexedSeq table.

delete_IgorModel_Marginals_Tables()
delete_IgorModel_Parms_Tables()
delete_IgorModel_Tables()
delete_IgorPgen_Tables()
execute_query(str_query)

Execute sql script in the SQLite database .

execute_select_query(str_query)

Execute sql script in the SQLite database .

execute_select_query_fetchone(str_query)

Execute sql script in the SQLite database .

executescript(cur, str_query)
export_IgorBestScenarios_to_AIRR(flnAIRR_arrangement, mdl=None, sep='\t')

Export Igor best scenarios to airr rearrangement format

fetch_AIRR_arrangement_By_seq_index(seq_index, mdl=None)
fetch_IgorAlignments_By_seq_index(strGene, seq_index, limit=None)

Fetch IgorAlignments from database by seq_index. :param strGene: string to specify the type of gene V, D or J :param seq_index: IgorIndexedSequences index

fetch_IgorAlignments_By_seq_index_and_gene_name(strGene, seq_index, gene_name, limit=None)
fetch_IgorBestScenarios_By_events_dict(events_id_list_dict)
fetch_IgorBestScenarios_By_seq_index(seq_index)
fetch_IgorGeneAnchors_By_Gene(strGene)
fetch_IgorGeneTemplate_By_gene_id(strGene, gene_id)

Fetch Gene templates in database from fasta files used by IGoR. :param strGene: string to specify the type of gene V, D or J :param gene_id:

fetch_IgorGeneTemplate_By_gene_name(strGene, gene_name)

Fetch Gene templates in database from fasta files used by IGoR. :param strGene: string to specify the type of gene V, D or J :param flnIgorGeneTemplate: Fasta file

fetch_IgorGenomicData_By_Gene(strGene)
fetch_IgorIndexedCDR3_By_seq_index(seq_index)
fetch_IgorIndexedSeq_By_seq_index(seq_index)

Fetch seq_index and sequence in Igor database. :param seq_index: string to specify the type of gene V, D or J :return:

fetch_IgorIndexedSeq_By_seq_indexList(seq_indexList)

Fetch seq_index and sequence in Igor database. :param seq_index: string to specify the type of gene V, D or J :return:

fetch_IgorIndexedSeq_indexes()
fetch_IgorIndexedSeq_records()

Fetch seq_index and sequence from Igor database. :param seq_index: string to specify the type of gene V, D or J :return:

fetch_IgorPgen()
fetch_IgorPgen_By_seq_index(seq_index)
fetch_best_IgorAlignments_By_seq_index(strGene, seq_index)

Fetch IgorAlignments from database by seq_index. :param strGene: string to specify the type of gene V, D or J :param seq_index: IgorIndexedSequences index

gen_IgorBestScenarios_cols_list()
get_DataFrame_IgorAlignment_By_seq_index(strGene, seq_index, limit=None)
get_Edges()
get_ErrorRate_dict()
get_Event_list()
get_IgorAlignment_data_list_By_seq_index(strGene, seq_index, limit=None)
get_IgorAlignment_data_list_query(strGene, seq_index, where=None)
get_IgorBestScenariosDataframe_By_seq_index(seq_index)
get_IgorBestScenarios_By_seq_index(seq_index)
get_IgorBestScenarios_By_seq_index_IgorModel(seq_index, mdl)
get_IgorGenomicDataFrame_dict()

return dataframes genomic_data

get_IgorIndexedSeq_By_seq_index(seq_index)
get_IgorModel()
get_IgorModel_Marginals()
get_IgorModel_Parms()
get_best_IgorAlignment_data_By_seq_index(strGene, seq_index)
get_columns_type_of_tables(tablename)
get_dict_of_Igortablename_sql()
get_list_of_tables_with_name(table_name_pattern)
get_naive_sequence_from_IgorAligment_data(seq_index)
get_table_colsname_list(tablename)
insert_IgorAlignments_FromCSVline(cur, strGene, csvline)

Insert IGoR Alignments on Database flnIgorDB :param strGene: string to specify the type of gene V, D or J :param csvline:

insert_IgorBestScenarios_FromCSVline(cur, csvline)
insert_IgorEvent_realization_FromDict(cur, event_table, event_realization_dict)
insert_IgorGeneAnchors_FromCSVline(strGene, cur, csvline)

Insert IGoR indexed_CDR3_sequences in Database flnIgorDB :param csvline:

insert_IgorGeneTemplate_FromBioRecord(strGene, gene_id, bioRecord)

Insert IGoR Gene template in Database flnIgorDB :param strGene: string to specify the type of gene V, D or J :param gene_id: id to identify the gene template :param bioRecord: Biopython record of the inserted sequence

insert_IgorIndexedCDR3_FromCSVline(cur, csvline)

Insert IGoR indexed_sequences on Database flnIgorDB :param csvline:

insert_IgorIndexedSeq_FromCSVline(cur, csvline)

Insert IGoR indexed_sequences on Database flnIgorDB :param csvline:

insert_IgorModel_Marginals_FromDict(cur, event_table, event_realization_dict)
insert_IgorRec_Event_FromDict(cur, event_dict: dict)
insert_load_IgorPgen_FromCSVline(cur, csvline)
list_from_db()
load_IgorAlignments_FromCSV(strGene, flnAlignments)

Insert Gene templates in database from fasta files used by IGoR. :param strGene: string to specify the type of gene V, D or J :param flnIgorGeneTemplate: Fasta file

load_IgorBestScenarios_FromCSV(flnIgorBestScenarios, mdl=None)
load_IgorGeneAnchors_FromCSV(strGene, flnGeneAnchors)
load_IgorGeneTemplate_FromFASTA(strGene, flnGeneTemplate)

Insert D Gene templates in database from fasta files used by IGoR. :param flnIgorGeneTemplate: Fasta file

load_IgorIndexedCDR3_FromCSV(flnIgorIndexedCDR3)

Insert indexed CDR3 in database from csv igor indexed_seqs file. :param conn: :param csvline: :return:

load_IgorIndexedSeq_FromCSV(flnIgorIndexedSeq)

Insert indexed sequence in database from csv igor indexed_seqs file. :param conn: :param csvline: :return:

load_IgorModel(mdl)
load_IgorModel_FromTXT(flnIgorModel_Parms, flnIgorModel_Marginals)
load_IgorModel_Marginals(mdl_xdata: dict)
load_IgorModel_Parms(mdl_parms)
load_IgorPgen_FromCSV(flnIgorPgen)
load_VDJ_Database(flnIgorIndexedSeq, flnVGeneTemplate, flnDGeneTemplate, flnJGeneTemplate, flnVAlignments, flnDAlignments, flnJAlignments)
load_db(**kwargs)

Return a parameter

write_IgorAlignments2Fasta(alnDataList)
write_IgorAlignments_to_CSV(strGene, flnGeneTemplate, sep=';')
write_IgorBestScenarios_to_CSV(flnIgorBestScenarios, mdl=None, sep=';')

seq_index;scenario_rank;scenario_proba_cond_seq;GeneChoice_V_gene_Undefined_side_prio7_size168;GeneChoice_J_gene_Undefined_side_prio7_size16;GeneChoice_D_gene_Undefined_side_prio6_size3;Deletion_V_gene_Three_prime_prio5_size21;Deletion_D_gene_Five_prime_prio5_size21;Deletion_D_gene_Three_prime_prio5_size21;Deletion_J_gene_Five_prime_prio5_size21;Insertion_VD_genes_Undefined_side_prio4_size41;DinucMarkov_VD_genes_Undefined_side_prio3_size16;Insertion_DJ_gene_Undefined_side_prio2_size41;DinucMarkov_DJ_gene_Undefined_side_prio1_size16;Mismatches 99;1;0.0401246;(109);(12);(2);(9);(16);(4);(7);(5);(2,2,0,3,0);(5);(0,2,0,3,3);() 99;2;0.0401246;(109);(12);(0);(9);(9);(7);(7);(5);(2,2,0,3,0);(5);(0,2,0,3,3);() 99;3;0.0100351;(109);(12);(2);(10);(13);(7);(7);(0);();(11);(0,2,0,3,3,2,2,2,0,0,3);() 99;4;0.0100351;(109);(12);(2);(9);(14);(7);(7);(0);();(11);(0,2,0,3,3,2,2,2,0,0,3);()

write_IgorGeneAnchors_to_CSV(strGene: str, flnGeneAnchors, sep=';')

Export Gene anchors to csv IGoR file :param strGene: Gene letter

write_IgorGeneTemplate_to_fasta(strGene, flnGeneTemplate, sep=';')
write_IgorIndexedCDR3_to_CSV(flnIgorIndexedCDR3, sep=';')
write_IgorIndexedSeq_to_CSV(flnIgorIndexedSeq, sep=';')
write_IgorModel_Marginals_to_TXT(flnIgorModel_Marginals)
write_IgorModel_Parms_to_TXT(flnIgorModel_Parms)
write_IgorModel_to_TXT(flnIgorModel_Parms, flnIgorModel_Marginals)
write_IgorPgen_to_CSV(flnIgorPgen, sep=';')

pygor3.IgorSqliteDBBestScenarios module

Created on Wed Oct 9 16:52:51 2019

@author: alfaceor

class pygor3.IgorSqliteDBBestScenarios.IgorSqliteDBBestScenariosVDJ(flnIgorSQLBestScenarios)

Bases: object

Class to create and load table or database with sequences

createSqliteDB(flnIgorDB)

Create a SQLite database with the flnIgorDB sql script.

fetch_IgorBestScenariosVDJ_By_seq_index(seq_index)

Fetch seq_index and sequence in Igor database. :param seq_index: string to specify the type of gene V, D or J :return:

getDJ_Region(seq_id, pdSelected)
getD_Region(seq_id, pdSelected)
getJ_Region(seq_id, pdSelected)
getVD_Region(seq_id, pdSelected)
getV_Region(seq_id, pdSelected)
get_BestScenariosDataFrame(flnBestScenarios, flnParms, flnMarginals)
get_BestScenariosNaiveSeq(seq_id, pdBestScen)
insert_IgorBestScenariosVDJ_FromCSVline(cur, csvline)

Insert IGoR indexed_sequences on Database flnIgorDB :param csvline:

load_IgorBestScenariosVDJ_FromCSV(flnIgorBestScenarios)

Insert bestScenarios database in database from csv file. :param flnIgorBestScenarios: :return:

setAlignsFile(flnAligns)
setInputSeqsFile(flnInputSeqs)
setModel(mdl)
setModelFromFiles(model_parms_file=None, model_marginals_file=None)

pygor3.config module

class pygor3.config.RcParams(*args, **kwargs)

Bases: dict

pygor3.config.create_config_files()
pygor3.config.load_config_files()

pygor3.imgt module

pygor3.imgt.download_Jgene_anchors(specie: str, chain: str, flnJGenome, ref_genes_path=None, modelspath=None, imgt_genedb='http://www.imgt.org/genedb/GENElect?')
pygor3.imgt.download_Vgene_anchors(specie: str, chain: str, flnVGenome, ref_genes_path=None, modelspath=None, imgt_genedb='http://www.imgt.org/genedb/GENElect?', sep=';')
pygor3.imgt.download_Vgene_anchors_bk(specie: str, chain: str, modelspath=None, imgt_genedb='http://www.imgt.org/genedb/GENElect?')
pygor3.imgt.download_gene_template(specie: str, gene: str, ref_genes_path=None, modelspath=None, filename=None, imgt_genedb='http://www.imgt.org/genedb/GENElect?')

Create a file in IGoR default format and returns output filename. :param specie: IMGT specie nomenclature use “+” instead of a space ” ” Mus+musculus :type species: str :param gene: IMGT gene nomenclature like TRAV, IGHJ, etc :type species: str :param modelspath: root paths for all models. :type modelspath: str, optional :param filename: gene template fasta filename. :type filename: str, optional :param imgt_genedb: Url of IMGT GeneDB web application. :type imgt_genedb: str

pygor3.imgt.download_genes_anchors(specie: str, chain: str, flnVGenome, flnJGenome, ref_genes_path=None, modelspath=None, imgt_genedb='http://www.imgt.org/genedb/GENElect?')

Download gene anchors from IMGT website in a IGoR structure directory (modelspath+”/”+specie+”/”+chain)

pygor3.imgt.download_ref_genome(species: str, chain: str, dropna=False, **kwargs)

Returns dictionary with genomic templates V(D)J :param species: IMGT species check get_species_list() to get list of possible names :param chain: IMGT chain, if [‘TRB’, ‘IGH’] returns VDJ dict and VJ for [‘TRA’, ‘IGL’]. :param dropna: Drop NaN in dataframes with new ids :return: genomic templates dictionary

pygor3.imgt.download_ref_genome_VDJ(species: str, chain: str, dropna=False, **kwargs)

Return a dictionary with genomics dataframes and also save files in a IGoR directory structure. :param species: IMGT species name :param chain: IMGT chain receptor name :param dropna: Remove rows with no defined values in any column :return : dictionary of pandas DataFrame with

pygor3.imgt.download_ref_genome_VJ(species: str, chain: str, dropna=False, **kwargs)

Download gene templates and anchors from IMGT and creates different files with original and short names.

Parameters
  • species (str) – Species name in IMGT nomenclature,

  • chain (str) – Chain name in IMGT nomenclature,

  • modelspath (str, optional) – root paths for all models.

  • filename (str, optional) – gene template fasta filename.

  • imgt_genedb (str) – Url of IMGT GeneDB web application.

  • dropna – Remove rows with no defined values in any column

pygor3.imgt.extract_fasta_from_url(url)

Extract fasta information from the typical IMGT format

pygor3.imgt.genAnchDict(url)
pygor3.imgt.genKey(seqDescription)

:param seqDescritption : is the header description in fasta file :return key : a generated key to create a dictionary :return startPos : the start position of the sequence based on ACCesion number sequence.

pygor3.imgt.gen_short_names(flnGenome, flnAnchors=None)
pygor3.imgt.getFunction(seqDescription)

Return Functionality

pygor3.imgt.getStartPos(seqDescription)

Return the start position of sequence based on the fasta file description

pygor3.imgt.get_dict_from_imgt_description(str_description)

Return an OderedDict with following the fields The IMGT FASTA header of nucleotide IMGT reference sequences contains 15 fields separated by ‘|’: 1. IMGT/LIGM-DB accession number(s) 2. IMGT gene and allele name 3. species 4. IMGT allele functionality 5. exon(s), region name(s), or extracted label(s) 6. start and end positions in the IMGT/LIGM-DB accession number(s) 7. number of nucleotides in the IMGT/LIGM-DB accession number(s) 8. codon start, or ‘NR’ (not relevant) for non coding labels 9. +n: number of nucleotides (nt) added in 5’ compared to the corresponding label extracted from IMGT/LIGM-DB 10. +n or -n: number of nucleotides (nt) added or removed in 3’ compared to the corresponding label extracted from IMGT/LIGM-DB 11. +n, -n, and/or nS: number of added, deleted, and/or substituted nucleotides to correct sequencing errors, or ‘not corrected’ if non corrected sequencing errors 12. number of amino acids (AA): this field indicates that the sequence is in amino acids 13. number of characters in the sequence: nt (or AA)+IMGT gaps=total 14. partial (if it is) 15. reverse complementary (if it is)

pygor3.imgt.get_gene_anchors(specie: str, gene: str, imgtlabel: str, modelspath=None, filename=None, imgt_genedb='http://www.imgt.org/genedb/GENElect?')

Download genomic anchors for V or J according to specie and gene from imgt database :return : string of the requested link

pygor3.imgt.get_gene_template(specie: str, gene: str, modelspath=None, filename=None, imgt_genedb='http://www.imgt.org/genedb/GENElect?')

Download genomic template according to specie and gene from imgt database

Parameters
  • species (str) – Species name in IMGT nomenclature.

  • gene – IMGT gene nomenclature like TRAV, IGHJ, etc.

:return : string of the requested link

pygor3.imgt.get_genedb_query72(specie: str, gene: str, imgt_genedb='http://www.imgt.org/genedb/GENElect?')

Returns imgt link to download genomic template according to imgt database. :param specie: imgt specie name check get_species_list() to get the names of species in IMGT DB. :param gene: imgt nomenclature for gene. :return : string of the requested link

pygor3.imgt.get_genedb_query81_imgtlabel(specie: str, gene: str, imgtlabel: str, imgt_genedb='http://www.imgt.org/genedb/GENElect?')

Returns imgt link to download genomic template according to imgt database with the corresponding IMGTlabel :return : string of the requested link

pygor3.imgt.get_records_list(url)

:return : a list of Sequence records.

pygor3.imgt.get_species_list()

Returns list of all species available in the IMGT database. :return : species list to make queries to IMGT GENEDB.

pygor3.imgt.load_records_from_fasta(filename: str)
pygor3.imgt.make_VDJ_model()
pygor3.imgt.save_records2fasta(records, filename: str)

pygor3.utils module

pygor3.utils.DEV_from_df_scenario_aln_to_da_scenario_aln_complete_genes(df_scenario_aln, dict_id_2_nt: Union[None, dict] = None)

Return a numpy array with the dict_id_2_nt dictionary :param df_scenario_aln: IgorModel.get_df_scenario_aln_from_scenario(ps_scenario) output for a scenario. :param dict_id_2_nt: Default dictionary {-1: ‘-‘, 0: ‘A’, 1: ‘C’, 2: ‘G’, 3: ‘T’} :return : numpy array with the values defined from dictionary ready to plot using imshow, matplot, etc.

class pygor3.utils.GeneSegment

Bases: object

class pygor3.utils.InsertSegment

Bases: object

pygor3.utils.dna_complementary(str_seq)
pygor3.utils.dna_translate(str_seq)
pygor3.utils.from_df_scenario_aln_to_da_scenario_aln(df_scenario_aln, dict_id_2_nt: Union[None, dict] = None)

Return a numpy array with the dict_id_2_nt dictionary :param df_scenario_aln: IgorModel.get_df_scenario_aln_from_scenario(ps_scenario) output for a scenario. :param dict_id_2_nt: Default dictionary {-1: ‘-‘, 0: ‘A’, 1: ‘C’, 2: ‘G’, 3: ‘T’} :return : numpy array with the values defined from dictionary ready to plot using imshow, matplot, etc.

pygor3.utils.get_D_KL_from_xarray(da_P_X_Y, da_P_X, da_P_Y)

base 10 : Mutual information of I_matrix = xr.apply_ufunc(func_D_KL, P_X_Y, P_X, P_Y) return I_matrix.sum()

pygor3.utils.get_P_stationary_state_dinucl(da_dinucl)

Get stationary state probability from dataarray dinucleotide

pygor3.utils.get_aln_scenario_np_from_da_scenario_aln(df_scenario_aln, dict_id_2_nt: Union[None, dict] = None)
pygor3.utils.get_anchors_dataframe_from_csv(fln_csv, sep=';')
pygor3.utils.get_colors(seqs)

make colors for bases in sequence

pygor3.utils.get_da_clustalw_align_from(np_description, np_str_sequence)

Return xarray DataArray in numerical convention gap : -1, A : 0, C : 1, G : 2, T : 3 :param np_description: numpy array with description of sequence. :param np_str_sequence: numpy array with string of sequence. :return : xarray DataArray of sequences aligned with clustalw.

pygor3.utils.get_dataframe_from_fasta(fln_fasta)

Return dataframe from fasta file :param fln_fasta: Fasta filename.

pygor3.utils.get_dataframe_from_fasta_and_csv_anchors(fln_fasta, fln_anchor_csv=None, sep=';')
Parameters
  • fln_fasta – Path of fasta file with genomic templates

  • fln_anchor_csv – Path of csv file with anchors, default csv separator ‘;’.

  • sep – separator for csv file. Default ‘;’

return genomic dataframe with genomic templates with anchors position if provided.

pygor3.utils.get_dataframe_from_fln_generated_seqs_werr(igor_fln_generated_seqs_werr)
pygor3.utils.get_dataframe_with_ref_genome_column_names(df_ref_genome: pandas.core.frame.DataFrame)
pygor3.utils.get_default_fln_dict_ref_genomes_species_chain(IgorSpecie: str, IgorChain: str, modelspath=None, ref_genome_path=None)

Return a dictionary with the paths of the genomic references ref_genome files. :param IgorSpecie: Species directory name in IGoR’s directory structure :param IgorChain: Chain directory name in IGoR’s directory structure :return: dictionary with the default names and paths for IGoR.

pygor3.utils.get_default_fln_names_for_model_dir(model_dir_path, ref_genome_path=None, models_path=None)

Return a dict with default names for files :param model_dir_path: Root of species chain directory. Example model_dir_path=”human/tcr_beta/”

pygor3.utils.get_default_models_fln_paths(models_path='models')

Get default filenames for models directory. :param models_path: models directory name

pygor3.utils.get_default_models_paths_species_chain(IgorSpecie, IgorChain, modelpath=None)

:return IgorModel loaded with the default location for specie and chain

pygor3.utils.get_default_ref_genome_fln_paths(ref_genome_path='ref_genome')

Get default filenames for genome template references. :param ref_genome_path: Default ref_genome directory name

pygor3.utils.get_df_anchors_from_df_ref_genome(df_ref_genome)
pygor3.utils.get_df_normalize_prob(df_scenarios)

Get a series with the normalize probabilities using scenario_proba_cond_seq

pygor3.utils.get_df_order_cols_ref_genome(df_all: pandas.core.frame.DataFrame)
pygor3.utils.get_df_pgen(fln_pgen)
pygor3.utils.get_gene_segment(str_gene_template, int_gene_5_del=None, int_gene_3_del=None)
pygor3.utils.get_join_genomics_anchors_dataframes(df_genes_templates, df_genes_anchors)
pygor3.utils.get_ref_genome_dataframe_from(df_genomic: pandas.core.frame.DataFrame, df_anchors: Optional[pandas.core.frame.DataFrame] = None, sep=';')
pygor3.utils.make_igor_directories(gene: str, specie: str, modelspath=None)

Make directories for all models path root gene species :param gene: Gene name :param specie: species

pygor3.utils.np_seq_to_str_seq(np_seq, dict_id_2_nt: Union[None, dict] = None)
pygor3.utils.plot_np_aln_sequences(aln_scenario_np, ax=None)
pygor3.utils.plot_scenario_from_da_scenario_aln(da_scenario_aln, nt_lim: Union[None, tuple, list] = None, show_CDR3=True, ax=None)

Returns a fig and ax with the recombination scenario :param da_scenario_aln: Xarray DataArray with the scenario alignment matrix in convention to plot

pygor3.utils.run_get_igor_datadir()

Return IGoR default data dir (default models and demo data) path

pygor3.utils.run_get_igor_exec_path()

Return IGoR executable path

pygor3.utils.run_get_igor_wd()

Return current directory, that can be use as default wd

pygor3.utils.run_get_random_string()

Return random string using subprocess

pygor3.utils.str_seq_to_np_seq(str_seq, dict_nt_2_id: Union[None, dict] = None)

Return a numpy array with the mapping values defined in dict_nt_2_id :param str_seq: String of nucleotide sequence. :param dict_nt_2_id: Dictionary with defined transformation of nucleotides (default Igor_dict_nt_2_id)

pygor3.utils.ufunc_log_pxy_over_px_py(p_xy, p_x, p_y)
pygor3.utils.view_alignment(aln, fontsize='9pt', plot_width=800)

Bokeh sequence alignment view

pygor3.utils.write_geneanchors_dataframe_to_csv(fln_anchor: Union[str, pathlib.Path, TextIO], df_ref_genome, sep=';')

Write gene anchors in csv file from a ref_genome dataframe :param fln_anchor: csv output filename. :param df_genomic: Pandas dataframe with columns ‘name’ for description and ‘value’ for sequence.

pygor3.utils.write_genetemplate_dataframe_to_fasta(fln_fasta: Union[str, pathlib.Path, TextIO], df_genomic)

Write dataframe to fasta file :param fln_fasta: Fasta output filename. :param df_genomic: Pandas dataframe with columns ‘name’ for description and ‘value’ for sequence.

pygor3.utils.write_ref_genome_files_from_dataframe(df_Gene_ref_genome, fln_fasta, fln_anchor=None, sep=';')

Write ref genome files from dataframe :param df_Gene_ref_genome: DataFrame with ref_genome. :param fln_fasta: Output fasta filename. :param fln_anchor: Output anchor filename. :param sep: Field sep for anchor file.

pygor3.utils.write_sequences_to_file(sequences: Union[pandas.core.frame.DataFrame, numpy.ndarray, list, str, tuple], fln_sequences: Union[str, pathlib.Path, TextIO], sep=';')

Write sequence to csv file from a dataframe, numpy array, list or single sequence. :param sequences: Sequences to write in a csv file. :param fln_sequences: CSV filename to output sequences.

pygor3.utils_pre_processing module

Module contents