Stim/glue/sample/src/sinter/_data/_task_stats.py at eeff06335cc002c86b582e7d44889b75d48fcc37 · quantumlib/Stim · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import collections
import dataclasses
from typing import Counter, List, Any
from typing import Optional
from typing import Union
from typing import overload
import numpy as np
from sinter._data._anon_task_stats import AnonTaskStats
from sinter._data._csv_out import csv_line


def _is_equal_json_values(json1: Any, json2: Any):
    if json1 == json2:
        return True

    if type(json1) == type(json2):
        if isinstance(json1, dict):
            return json1.keys() == json2.keys() and all(_is_equal_json_values(json1[k], json2[k]) for k in json1.keys())
        elif isinstance(json1, (list, tuple)):
            return len(json1) == len(json2) and all(_is_equal_json_values(a, b) for a, b in zip(json1, json2))
    elif isinstance(json1, (list, tuple)) and isinstance(json2, (list, tuple)):
        return _is_equal_json_values(tuple(json1), tuple(json2))

    return False


@dataclasses.dataclass(frozen=True)
class TaskStats:
    """Statistics sampled from a task.

    The rows in the CSV files produced by sinter correspond to instances of
    `sinter.TaskStats`. For example, a row can be produced by printing a
    `sinter.TaskStats`.

    Attributes:
        strong_id: The cryptographically unique identifier of the task, from
            `sinter.Task.strong_id()`.
        decoder: The name of the decoder that was used to decode the task.
            Errors are counted when this decoder made a wrong prediction.
        json_metadata: A JSON-encodable value (such as a dictionary from strings
            to integers) that were included with the task in order to describe
            what the task was. This value can be a huge variety of things, but
            typically it will be a dictionary with fields such as 'd' for the
            code distance.
        shots: Number of times the task was sampled.
        errors: Number of times a sample resulted in an error.
        discards: Number of times a sample resulted in a discard. Note that
            discarded a task is not an error.
        seconds: The amount of CPU core time spent sampling the tasks, in
            seconds.
        custom_counts: A counter mapping string keys to integer values. Used for
            tracking arbitrary values, such as per-observable error counts or
            the number of times detectors fired. The meaning of the information
            in the counts is not specified; the only requirement is that it
            should be correct to add each key's counts when merging statistics.

            Although this field is an editable object, it's invalid to edit the
            counter after the stats object is initialized.
    """

    # Information describing the problem that was sampled.
    strong_id: str
    decoder: str
    json_metadata: Any

    # Information describing the results of sampling.
    shots: int = 0
    errors: int = 0
    discards: int = 0
    seconds: float = 0
    custom_counts: Counter[str] = dataclasses.field(default_factory=collections.Counter)

    def __post_init__(self):
        assert isinstance(self.errors, (int, np.integer))
        assert isinstance(self.shots, (int, np.integer))
        assert isinstance(self.discards, (int, np.integer))
        assert isinstance(self.seconds, (int, float, np.integer, np.floating))
        assert isinstance(self.custom_counts, collections.Counter)
        assert isinstance(self.decoder, str)
        assert isinstance(self.strong_id, str)
        assert self.json_metadata is None or isinstance(self.json_metadata, (int, float, str, dict, list, tuple))

        if isinstance(self.errors, np.integer):
            object.__setattr__(self, 'errors', int(self.errors))
        if isinstance(self.shots, np.integer):
            object.__setattr__(self, 'shots', int(self.shots))
        if isinstance(self.discards, np.integer):
            object.__setattr__(self, 'discards', int(self.discards))
        if isinstance(self.seconds, (np.integer, np.floating)):
            object.__setattr__(self, 'seconds', float(self.seconds))

        assert self.errors >= 0
        assert self.discards >= 0
        assert self.seconds >= 0
        assert self.shots >= self.errors + self.discards
        assert all(isinstance(k, str) and isinstance(v, (int, np.integer)) for k, v in self.custom_counts.items())

    def with_edits(
        self,
        *,
        strong_id: Optional[str] = None,
        decoder: Optional[str] = None,
        json_metadata: Optional[Any] = None,
        shots: Optional[int] = None,
        errors: Optional[int] = None,
        discards: Optional[int] = None,
        seconds: Optional[float] = None,
        custom_counts: Optional[Counter[str]] = None,
    ) -> 'TaskStats':
        return TaskStats(
            strong_id=self.strong_id if strong_id is None else strong_id,
            decoder=self.decoder if decoder is None else decoder,
            json_metadata=self.json_metadata if json_metadata is None else json_metadata,
            shots=self.shots if shots is None else shots,
            errors=self.errors if errors is None else errors,
            discards=self.discards if discards is None else discards,
            seconds=self.seconds if seconds is None else seconds,
            custom_counts=self.custom_counts if custom_counts is None else custom_counts,
        )

    @overload
    def __add__(self, other: AnonTaskStats) -> AnonTaskStats:
        pass
    @overload
    def __add__(self, other: 'TaskStats') -> 'TaskStats':
        pass
    def __add__(self, other: Union[AnonTaskStats, 'TaskStats']) -> Union[AnonTaskStats, 'TaskStats']:
        if isinstance(other, AnonTaskStats):
            return self.to_anon_stats() + other

        if isinstance(other, TaskStats):
            if self.strong_id != other.strong_id:
                raise ValueError(f'{self.strong_id=} != {other.strong_id=}')
            if not _is_equal_json_values(self.json_metadata, other.json_metadata) or self.decoder != other.decoder:
                raise ValueError(
                    "A stat had the same strong id as another, but their other identifying information (json_metadata, decoder) differed.\n"
                    "The strong id is supposed to be a cryptographic hash that uniquely identifies what was sampled, so this is an error.\n"
                    "\n"
                    "This failure can occur when post-processing data (e.g. combining X basis stats and Z basis stats into synthetic both-basis stats).\n"
                    "To fix it, ensure any post-processing sets the strong id of the synthetic data in some cryptographically secure way.\n"
                    "\n"
                    "In some cases this can be caused by attempting to add a value that has gone through JSON serialization+parsing to one\n"
                    "that hasn't, which causes things like tuples transforming into lists.\n"
                    "\n"
                    f"The two stats:\n1. {self!r}\n2. {other!r}")

            total = self.to_anon_stats() + other.to_anon_stats()
            return TaskStats(
                decoder=self.decoder,
                strong_id=self.strong_id,
                json_metadata=self.json_metadata,
                shots=total.shots,
                errors=total.errors,
                discards=total.discards,
                seconds=total.seconds,
                custom_counts=total.custom_counts,
            )

        return NotImplemented
    __radd__ = __add__

    def to_anon_stats(self) -> AnonTaskStats:
        """Returns a `sinter.AnonTaskStats` with the same statistics.

        Examples:
            >>> import sinter
            >>> stat = sinter.TaskStats(
            ...     strong_id='test',
            ...     json_metadata={'a': [1, 2, 3]},
            ...     decoder='pymatching',
            ...     shots=22,
            ...     errors=3,
            ...     discards=4,
            ...     seconds=5,
            ... )
            >>> stat.to_anon_stats()
            sinter.AnonTaskStats(shots=22, errors=3, discards=4, seconds=5)
        """
        return AnonTaskStats(
            shots=self.shots,
            errors=self.errors,
            discards=self.discards,
            seconds=self.seconds,
            custom_counts=self.custom_counts.copy(),
        )

    def to_csv_line(self) -> str:
        """Converts into a line that can be printed into a CSV file.

        Examples:
            >>> import sinter
            >>> stat = sinter.TaskStats(
            ...     strong_id='test',
            ...     json_metadata={'a': [1, 2, 3]},
            ...     decoder='pymatching',
            ...     shots=22,
            ...     errors=3,
            ...     seconds=5,
            ... )
            >>> print(sinter.CSV_HEADER)
                 shots,    errors,  discards, seconds,decoder,strong_id,json_metadata,custom_counts
            >>> print(stat.to_csv_line())
                    22,         3,         0,    5.00,pymatching,test,"{""a"":[1,2,3]}",
        """
        return csv_line(
            shots=int(self.shots),
            errors=int(self.errors),
            seconds=float(self.seconds),
            discards=int(self.discards),
            strong_id=self.strong_id,
            decoder=self.decoder,
            json_metadata=self.json_metadata,
            custom_counts=self.custom_counts,
        )

    def _split_custom_counts(self, custom_keys: List[str]) -> List['TaskStats']:
        result = []
        for k in custom_keys:
            m = self.json_metadata
            if isinstance(m, dict):
                m = dict(m)
                m.setdefault('custom_error_count_key', k)
                m.setdefault('original_error_count', self.errors)
            result.append(TaskStats(
                strong_id=f'{self.strong_id}:{k}',
                decoder=self.decoder,
                json_metadata=m,
                shots=self.shots,
                errors=self.custom_counts[k],
                discards=self.discards,
                seconds=self.seconds,
                custom_counts=self.custom_counts,
            ))
        return result

    def __str__(self) -> str:
        return self.to_csv_line()

    def __repr__(self) -> str:
        terms = []
        terms.append(f'strong_id={self.strong_id!r}')
        terms.append(f'decoder={self.decoder!r}')
        terms.append(f'json_metadata={self.json_metadata!r}')
        if self.shots:
            terms.append(f'shots={self.shots!r}')
        if self.errors:
            terms.append(f'errors={self.errors!r}')
        if self.discards:
            terms.append(f'discards={self.discards!r}')
        if self.seconds:
            terms.append(f'seconds={self.seconds!r}')
        if self.custom_counts:
            terms.append(f'custom_counts={self.custom_counts!r}')
        return f'sinter.TaskStats({", ".join(terms)})'