1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
def transpose(sequences: List[str]) -> Generator[str, None, None]:
"""
Transposes a list of sequences.
Args:
sequences (List[str]): A list of sequences.
Yields:
str: Transposed sequences.
Examples:
>>> sequences = ["ATCG", "GTCA"]
>>> for transposed in transpose(sequences):
... print(transposed)
AG
TC
CG
A
"""
for _transpose in zip(*sequences):
yield _transpose
def nucleotide_counter(sequences: List[str]) -> Generator[Counter, None, None]:
"""
Counts the occurrences of nucleotides in a list of sequences.
Args:
sequences (List[str]): A list of sequences.
Yields:
Counter: A counter object containing the count of nucleotides.
Examples:
>>> sequences = ["ATCG", "GTCA"]
>>> counters = nucleotide_counter(sequences)
>>> for counter in counters:
... print(counter)
Counter({"A": 1, "G": 1})
Counter({"T": 1, "C": 1})
Counter({"C": 1, "G": 1})
Counter({"A": 1})
"""
return map(Counter, transpose(sequences))
def consensus(sequences: List[str]) -> str:
"""
Finds the consensus sequence and count matrix from a list of sequences.
Args:
sequences (List[str]): A list of sequences.
Returns:
str: The consensus sequence.
List[Tuple[int, int, int, int]]: The count matrix containing the occurrences of nucleotides.
Examples:
>>> sequences = ["ATCG", "GTCA"]
>>> consensus_seq, count_matrix = consensus(sequences)
>>> print(consensus_seq)
ATCG
>>> print(count_matrix)
[(1, 0, 0, 0), (0, 0, 1, 1), (0, 1, 0, 0), (0, 0, 1, 1)]
"""
def _consensus(counters) -> Generator[str, None, None]:
for count in counters:
yield count.most_common(1)[0][0]
def _count_matrix(counters: List[Counter]) -> Generator[Tuple[int, int, int, int], None, None]:
for counter in counters:
a = counter.get("A", 0)
c = counter.get("C", 0)
g = counter.get("G", 0)
t = counter.get("T", 0)
yield (a, c, g, t)
counters = tuple(nucleotide_counter(sequences))
consensus_sequence = "".join(_consensus(counters))
count_matrix = list(transpose(_count_matrix(counters)))
return consensus_sequence, count_matrix
|