Source code for phenotrex.structure.records
#
# Created by Lukas Lüftinger on 2/5/19.
#
from typing import List, Optional
from dataclasses import dataclass
[docs]@dataclass
class GenotypeRecord:
"""
Genomic features of a sample referenced by `identifier`.
"""
identifier: str
features: List[str]
def __repr__(self):
return f"{self.identifier} n_features={len(self.features)}"
[docs]@dataclass
class PhenotypeRecord:
"""
Ground truth labels of sample `identifier`,
indicating presence/absence of trait `trait_name`:
- 0 if trait is absent
- 1 if trait is present
"""
identifier: str
trait_name: str
trait_sign: int
def __repr__(self):
return f"{self.identifier} trait({self.trait_name})={self.trait_sign}"
[docs]@dataclass
class GroupRecord:
"""
Group label of sample `identifier`.
Notes
-----
Useful for leave-one-group-out cross-validation (LOGO-CV),
for example, to take taxonomy into account.
"""
identifier: str
group_name: Optional[str]
group_id: Optional[int]
def __repr__(self):
return f"{self.identifier} group({self.group_name})={self.group_id}"
[docs]@dataclass
class TrainingRecord(GenotypeRecord, PhenotypeRecord, GroupRecord):
"""
Sample containing Genotype-, Phenotype- and GroupRecords,
suitable as machine learning input for a single observation.
"""
def __repr__(self):
gr_repr = GenotypeRecord.__repr__(self).split(' ')[1]
pr_repr = PhenotypeRecord.__repr__(self).split(' ')[1]
if self.group_name is not None and self.group_id is not None:
gro_repr = GroupRecord.__repr__(self).split(' ')[1]
else:
gro_repr = ''
return f"id={self.identifier} {' '.join([gr_repr, pr_repr, gro_repr])}"