/*
 *      Copyright (C) 2015  higherfrequencytrading.com
 *
 *      This program is free software: you can redistribute it and/or modify
 *      it under the terms of the GNU Lesser General Public License as published by
 *      the Free Software Foundation, either version 3 of the License.
 *
 *      This program is distributed in the hope that it will be useful,
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *      GNU Lesser General Public License for more details.
 *
 *      You should have received a copy of the GNU Lesser General Public License
 *      along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package net.openhft.chronicle.map;

import net.openhft.chronicle.algo.bitset.*;
import net.openhft.chronicle.bytes.Bytes;
import net.openhft.chronicle.hash.Data;
import net.openhft.chronicle.hash.VanillaGlobalMutableState;
import net.openhft.chronicle.hash.impl.TierCountersArea;
import net.openhft.chronicle.hash.impl.stage.hash.ChainingInterface;
import net.openhft.chronicle.hash.replication.ReplicableEntry;
import net.openhft.chronicle.map.impl.CompiledReplicatedMapIterationContext;
import net.openhft.chronicle.map.impl.CompiledReplicatedMapQueryContext;
import net.openhft.chronicle.map.replication.MapRemoteOperations;
import net.openhft.chronicle.values.Values;
import net.openhft.chronicle.wire.WireIn;
import net.openhft.chronicle.wire.WireOut;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReferenceArray;
import java.util.stream.Stream;

import static net.openhft.chronicle.algo.MemoryUnit.*;
import static net.openhft.chronicle.algo.bitset.BitSetFrame.NOT_FOUND;
import static net.openhft.chronicle.algo.bytes.Access.nativeAccess;
import static net.openhft.chronicle.hash.replication.TimeProvider.currentTime;

/**
 * <h2>A Replicating Multi Master HashMap</h2> <p>Each remote hash map, mirrors its changes over to
 * another remote hash map, neither hash map is considered the master store of data, each hash map
 * uses timestamps to reconcile changes. We refer to an instance of a remote hash-map as a node. A
 * node will be connected to any number of other nodes, for the first implementation the maximum
 * number of nodes will be fixed. The data that is stored locally in each node will become
 * eventually consistent. So changes made to one node, for example by calling put() will be
 * replicated over to the other node. To achieve a high level of performance and throughput, the
 * call to put() won’t block, with concurrentHashMap, It is typical to check the return code of some
 * methods to obtain the old value for example remove(). Due to the loose coupling and lock free
 * nature of this multi master implementation,  this return value will only be the old value on the
 * nodes local data store. In other words the nodes are only concurrent locally. Its worth realising
 * that another node performing exactly the same operation may return a different value. However
 * reconciliation will ensure the maps themselves become eventually consistent. </p>
 * <h2>Reconciliation </h2> <p>If two ( or more nodes ) were to receive a change to their maps for
 * the same key but different values, say by a user of the maps, calling the put(key, value). Then,
 * initially each node will update its local store and each local store will hold a different value,
 * but the aim of multi master replication is to provide eventual consistency across the nodes. So,
 * with multi master when ever a node is changed it will notify the other nodes of its change. We
 * will refer to this notification as an event. The event will hold a timestamp indicating the time
 * the change occurred, it will also hold the state transition, in this case it was a put with a key
 * and value. Eventual consistency is achieved by looking at the timestamp from the remote node, if
 * for a given key, the remote nodes timestamp is newer than the local nodes timestamp, then the
 * event from the remote node will be applied to the local node, otherwise the event will be
 * ignored. </p> <p>However there is an edge case that we have to concern ourselves with, If two
 * nodes update their map at the same time with different values, we have to deterministically
 * resolve which update wins, because of eventual consistency both nodes should end up locally
 * holding the same data. Although it is rare two remote nodes could receive an update to their maps
 * at exactly the same time for the same key, we have to handle this edge case, its therefore
 * important not to rely on timestamps alone to reconcile the updates. Typically the update with the
 * newest timestamp should win, but in this example both timestamps are the same, and the decision
 * made to one node should be identical to the decision made to the other. We resolve this simple
 * dilemma by using a node identifier, each node will have a unique identifier, the update from the
 * node with the smallest identifier wins. </p>
 *
 * @param <K> the entries key type
 * @param <V> the entries value type
 */
public class ReplicatedChronicleMap<K, V, R> extends VanillaChronicleMap<K, V, R>
        implements Replica, Replica.EntryExternalizable {

    private static final Logger LOG = LoggerFactory.getLogger(ReplicatedChronicleMap.class);

    public static final int ADDITIONAL_ENTRY_BYTES = 10;

    static final byte ENTRY_HUNK = 1;
    static final byte BOOTSTRAP_TIME_HUNK = 2;

    private long tierModIterBitSetSizeInBits;
    private long tierModIterBitSetOuterSize;
    private long segmentModIterBitSetsForIdentifierOuterSize;
    private long tierBulkModIterBitSetsForIdentifierOuterSize;

    /**
     * Default value is 0, that corresponds to "unset" identifier value (valid ids are positive)
     */
    private transient byte localIdentifier;
    transient Set<Closeable> closeables;

    /**
     * Idiomatically {@code assignedModificationIterators} should be a {@link CopyOnWriteArraySet},
     * but we should frequently iterate over this array without creating any garbage,
     * that is impossible with {@code CopyOnWriteArraySet}.
     */
    private transient ModificationIterator[] assignedModificationIterators;
    private transient AtomicReferenceArray<ModificationIterator> modificationIterators;
    private transient long startOfModificationIterators;

    public transient boolean cleanupRemovedEntries;
    public transient long cleanupTimeout;
    public transient TimeUnit cleanupTimeoutUnit;
    
    public transient MapRemoteOperations<K, V, R> remoteOperations;

    transient BitSetFrame tierModIterFrame;

    private transient long[] remoteNodeCouldBootstrapFrom;

    public ReplicatedChronicleMap(@NotNull ChronicleMapBuilder<K, V> builder) throws IOException {
        super(builder);
        tierModIterBitSetSizeInBits = computeTierModIterBitSetSizeInBits();
        tierModIterBitSetOuterSize = computeTierModIterBitSetOuterSize();
        segmentModIterBitSetsForIdentifierOuterSize =
                computeSegmentModIterBitSetsForIdentifierOuterSize();
        tierBulkModIterBitSetsForIdentifierOuterSize =
                computeTierBulkModIterBitSetsForIdentifierOuterSize(tiersInBulk);
    }

    @Override
    protected void readMarshallableFields(@NotNull WireIn wireIn) {
        super.readMarshallableFields(wireIn);

        tierModIterBitSetSizeInBits = wireIn.read(() -> "tierModIterBitSetSizeInBits").int64();
        tierModIterBitSetOuterSize = wireIn.read(() -> "tierModIterBitSetOuterSize").int64();
        segmentModIterBitSetsForIdentifierOuterSize =
                wireIn.read(() -> "segmentModIterBitSetsForIdentifierOuterSize").int64();
        tierBulkModIterBitSetsForIdentifierOuterSize =
                wireIn.read(() -> "tierBulkModIterBitSetsForIdentifierOuterSize").int64();
    }

    @Override
    public void writeMarshallable(@NotNull WireOut wireOut) {
        super.writeMarshallable(wireOut);

        wireOut.write(() -> "tierModIterBitSetSizeInBits").int64(tierModIterBitSetSizeInBits);
        wireOut.write(() -> "tierModIterBitSetOuterSize").int64(tierModIterBitSetOuterSize);
        wireOut.write(() -> "segmentModIterBitSetsForIdentifierOuterSize")
                .int64(segmentModIterBitSetsForIdentifierOuterSize);
        wireOut.write(() -> "tierBulkModIterBitSetsForIdentifierOuterSize")
                .int64(tierBulkModIterBitSetsForIdentifierOuterSize);
    }

    @Override
    protected VanillaGlobalMutableState createGlobalMutableState() {
        return Values.newNativeReference(ReplicatedGlobalMutableState.class);
    }

    @Override
    public ReplicatedGlobalMutableState globalMutableState() {
        return (ReplicatedGlobalMutableState) super.globalMutableState();
    }

    @Override
    public void initTransients() {
        super.initTransients();
        initOwnTransients();
    }

    private void initOwnTransients() {
        //noinspection unchecked
        assignedModificationIterators = new ReplicatedChronicleMap.ModificationIterator[0];
        modificationIterators = new AtomicReferenceArray<>(128);
        closeables = new CopyOnWriteArraySet<>();
        tierModIterFrame = new SingleThreadedFlatBitSetFrame(computeTierModIterBitSetSizeInBits());
        remoteNodeCouldBootstrapFrom = new long[128];
    }

    @Override
    void initTransientsFromBuilder(ChronicleMapBuilder<K, V> builder) {
        super.initTransientsFromBuilder(builder);
        this.localIdentifier = builder.replicationIdentifier;
        if (localIdentifier == -1)
            throw new IllegalStateException("localIdentifier should not be -1");
        //noinspection unchecked
        this.remoteOperations = (MapRemoteOperations<K, V, R>) builder.remoteOperations;
        cleanupRemovedEntries = builder.cleanupRemovedEntries;
        cleanupTimeout = builder.cleanupTimeout;
        cleanupTimeoutUnit = builder.cleanupTimeoutUnit;
    }

    private long computeTierModIterBitSetSizeInBits() {
        return LONGS.align(actualChunksPerSegmentTier, BITS);
    }

    private long computeTierModIterBitSetOuterSize() {
        long tierModIterBitSetOuterSize = BYTES.convert(computeTierModIterBitSetSizeInBits(), BITS);
        // protect from false sharing between bit sets of adjacent segments
        tierModIterBitSetOuterSize += BYTES.convert(2, CACHE_LINES);
        if (CACHE_LINES.align(tierModIterBitSetOuterSize, BYTES) == tierModIterBitSetOuterSize) {
            tierModIterBitSetOuterSize =
                    breakL1CacheAssociativityContention(tierModIterBitSetOuterSize);
        }
        return tierModIterBitSetOuterSize;
    }

    private long computeSegmentModIterBitSetsForIdentifierOuterSize() {
        return computeTierModIterBitSetOuterSize() * actualSegments;
    }

    private long computeTierBulkModIterBitSetsForIdentifierOuterSize(long tiersInBulk) {
        return computeTierModIterBitSetOuterSize() * tiersInBulk;
    }

    @Override
    protected long computeTierBulkInnerOffsetToTiers(long tiersInBulk) {
        long tierBulkBitSetsInnerSize =
                computeTierBulkModIterBitSetsForIdentifierOuterSize(tiersInBulk) * 128;
        return super.computeTierBulkInnerOffsetToTiers(tiersInBulk) +
                CACHE_LINES.align(tierBulkBitSetsInnerSize, BYTES);
    }

    @Override
    public long mapHeaderInnerSize() {
        return super.mapHeaderInnerSize() + (segmentModIterBitSetsForIdentifierOuterSize * 128);
    }

    @Override
    public void setRemoteNodeCouldBootstrapFrom(byte remoteIdentifier, long bootstrapTimestamp) {
        remoteNodeCouldBootstrapFrom[remoteIdentifier] = bootstrapTimestamp;
    }

    @Override
    public long remoteNodeCouldBootstrapFrom(byte remoteIdentifier) {
        return remoteNodeCouldBootstrapFrom[remoteIdentifier];
    }

    @Override
    public void onHeaderCreated() {
        // Pad modification iterators at 3 cache lines from the end of the map header,
        // to avoid false sharing with the header of the first segment
        startOfModificationIterators = super.mapHeaderInnerSize() +
                RESERVED_GLOBAL_MUTABLE_STATE_BYTES - BYTES.convert(3, CACHE_LINES);
    }

    @Override
    protected void zeroOutNewlyMappedChronicleMapBytes() {
        super.zeroOutNewlyMappedChronicleMapBytes();
        bs.zeroOut(super.mapHeaderInnerSize(), this.mapHeaderInnerSize());
    }

    void addCloseable(Closeable closeable) {
        closeables.add(closeable);
    }

    @Override
    public synchronized void close() {
        if (closed)
            return;
        for (Closeable closeable : closeables) {
            try {
                closeable.close();
            } catch (IOException e) {
                LOG.error("", e);
            }
        }
        super.close();
    }

    @Override
    public byte identifier() {
        byte id = localIdentifier;
        if (id == 0) {
            throw new IllegalStateException("Replication identifier is not set for this\n" +
                    "replicated Chronicle Map. This should only be possible if persisted\n" +
                    "replicated Chronicle Map access from another process/JVM run/after\n" +
                    "a transfer from another machine, and replication identifier is not\n" +
                    "specified when access is configured, e. g. ChronicleMap.of(...)" +
                    ".createPersistedTo(existingFile).\n" +
                    "In this case, replicated Chronicle Map \"doesn't know\" it's identifier,\n" +
                    "and is able to perform simple _read_ operations like map.get(), which\n" +
                    "doesn't access the identifier. To perform updates, insertions, replication\n" +
                    "tasks, you should configure the current node identifier,\n" +
                    "by `replication(identifier)` method call in ChronicleMapBuilder\n" +
                    "configuration chain.");
        }
        assert id > 0;
        return id;
    }

    @Override
    public ModificationIterator acquireModificationIterator(byte remoteIdentifier) {
        ModificationIterator modificationIterator = modificationIterators.get(remoteIdentifier);
        if (modificationIterator != null)
            return modificationIterator;

        globalMutableStateLock();
        try {
            modificationIterator = modificationIterators.get(remoteIdentifier);

            if (modificationIterator != null)
                return modificationIterator;

            ReplicatedGlobalMutableState globalMutableState = globalMutableState();
            boolean modificationIteratorInit =
                    globalMutableState.getModificationIteratorInitAt(remoteIdentifier);
            final ModificationIterator modIter =
                    new ModificationIterator(remoteIdentifier, modificationIteratorInit);
            if (!modificationIteratorInit) {
                globalMutableState.setModificationIteratorInitAt(remoteIdentifier, true);
                // This doesn't need to be volatile update, because modification iterators count
                // is checked (also non-volatile) in the beginning of raiseChange()/dropChange()
                // methods, the risk is that a new modification iterator is added, dirtyEntries()
                // already completed, and then we call raiseChange() and miss the new iterator.
                // raiseChange() is called, when the segment lock is held on update level, as well
                // as dirtyEntries() (that is segment iteration, which is always performed on
                // update-level lock), so the two actions (dirtyEntries() and raiseChange())
                // are serialized between each other, => change to ModificationIteratorsCount is
                // visible.
                globalMutableState.addModificationIteratorsCount(1);
            }
            //noinspection unchecked
            assignedModificationIterators = Stream
                    .concat(Arrays.stream(assignedModificationIterators), Stream.of(modIter))
                    .sorted(Comparator.comparing(it -> it.remoteIdentifier))
                    .toArray(ReplicatedChronicleMap.ModificationIterator[]::new);
            modificationIterators.set(remoteIdentifier, modIter);
            return modIter;
        } finally {
            globalMutableStateUnlock();
        }
    }

    public ModificationIterator[] acquireAllModificationIterators() {
        for (int remoteIdentifier = 0; remoteIdentifier < 128; remoteIdentifier++) {
            if (globalMutableState().getModificationIteratorInitAt(remoteIdentifier)) {
                acquireModificationIterator((byte) remoteIdentifier);
            }
        }
        return assignedModificationIterators;
    }

    private void updateModificationIteratorsArray() {
        if (globalMutableState().getModificationIteratorsCount() !=
                assignedModificationIterators.length) {
            acquireAllModificationIterators();
        }
    }

    public void raiseChange(long tierIndex, long pos) {
        // -1 is invalid remoteIdentifier => raise change for all
        raiseChangeForAllExcept(tierIndex, pos, (byte) -1);
    }

    public void raiseChangeFor(long tierIndex, long pos, byte remoteIdentifier) {
        acquireModificationIterator(remoteIdentifier).raiseChange0(tierIndex, pos);
    }

    public void raiseChangeForAllExcept(long tierIndex, long pos, byte remoteIdentifier) {
        updateModificationIteratorsArray();
        if (tierIndex <= actualSegments) {
            long segmentIndex = tierIndex - 1;
            long offsetToTierBitSet = segmentIndex * tierModIterBitSetOuterSize;
            for (ModificationIterator it : assignedModificationIterators) {
                if (it.remoteIdentifier != remoteIdentifier)
                    it.raiseChangeInSegment(offsetToTierBitSet, pos);
            }
        } else {
            long extraTierIndex = tierIndex - 1 - actualSegments;
            int bulkIndex = (int) (extraTierIndex >> log2TiersInBulk);
            long offsetToTierBitSet =
                    (extraTierIndex & (tiersInBulk - 1)) * tierModIterBitSetOuterSize;
            for (ModificationIterator it : assignedModificationIterators) {
                if (it.remoteIdentifier != remoteIdentifier)
                    it.raiseChangeInTierBulk(bulkIndex, offsetToTierBitSet, pos);
            }
        }
    }

    public void dropChange(long tierIndex, long pos) {
        updateModificationIteratorsArray();
        if (tierIndex <= actualSegments) {
            long segmentIndex = tierIndex - 1;
            long offsetToTierBitSet = segmentIndex * tierModIterBitSetOuterSize;
            for (ModificationIterator modificationIterator : assignedModificationIterators) {
                modificationIterator.dropChangeInSegment(offsetToTierBitSet, pos);
            }
        } else {
            long extraTierIndex = tierIndex - 1 - actualSegments;
            int bulkIndex = (int) (extraTierIndex >> log2TiersInBulk);
            long offsetToTierBitSet =
                    (extraTierIndex & (tiersInBulk - 1)) * tierModIterBitSetOuterSize;
            for (ModificationIterator modificationIterator : assignedModificationIterators) {
                modificationIterator.dropChangeInTierBulk(bulkIndex, offsetToTierBitSet, pos);
            }
        }
    }

    public void dropChangeFor(long tierIndex, long pos, byte remoteIdentifier) {
        acquireModificationIterator(remoteIdentifier).dropChange0(tierIndex, pos);
    }

    public void moveChange(long oldTierIndex, long oldPos, long newTierIndex, long newPos) {
        updateModificationIteratorsArray();
        if (oldTierIndex <= actualSegments) {
            long oldSegmentIndex = oldTierIndex - 1;
            long oldOffsetToTierBitSet = oldSegmentIndex * tierModIterBitSetOuterSize;
            if (newTierIndex <= actualSegments) {
                long newSegmentIndex = newTierIndex - 1;
                long newOffsetToTierBitSet = newSegmentIndex * tierModIterBitSetOuterSize;
                for (ModificationIterator modificationIterator : assignedModificationIterators) {
                    if (modificationIterator.dropChangeInSegment(oldOffsetToTierBitSet, oldPos))
                        modificationIterator.raiseChangeInSegment(newOffsetToTierBitSet, newPos);
                }
            } else {
                long newExtraTierIndex = newTierIndex - 1 - actualSegments;
                int newBulkIndex = (int) (newExtraTierIndex >> log2TiersInBulk);
                long newOffsetToTierBitSet =
                        (newExtraTierIndex & (tiersInBulk - 1)) * tierModIterBitSetOuterSize;
                for (ModificationIterator modificationIterator : assignedModificationIterators) {
                    if (modificationIterator.dropChangeInSegment(oldOffsetToTierBitSet, oldPos)) {
                        modificationIterator
                                .raiseChangeInTierBulk(newBulkIndex, newOffsetToTierBitSet, newPos);
                    }
                }
            }
        } else {
            long oldExtraTierIndex = oldTierIndex - 1 - actualSegments;
            int oldBulkIndex = (int) (oldExtraTierIndex >> log2TiersInBulk);
            long oldOffsetToTierBitSet =
                    (oldExtraTierIndex & (tiersInBulk - 1)) * tierModIterBitSetOuterSize;
            if (newTierIndex <= actualSegments) {
                long newSegmentIndex = newTierIndex - 1;
                long newOffsetToTierBitSet = newSegmentIndex * tierModIterBitSetOuterSize;
                for (ModificationIterator modificationIterator : assignedModificationIterators) {
                    if (modificationIterator
                            .dropChangeInTierBulk(oldBulkIndex, oldOffsetToTierBitSet, oldPos)) {
                        modificationIterator.raiseChangeInSegment(newOffsetToTierBitSet, newPos);
                    }
                }
            } else {
                long newExtraTierIndex = newTierIndex - 1 - actualSegments;
                int newBulkIndex = (int) (newExtraTierIndex >> log2TiersInBulk);
                long newOffsetToTierBitSet =
                        (newExtraTierIndex & (tiersInBulk - 1)) * tierModIterBitSetOuterSize;
                for (ModificationIterator modificationIterator : assignedModificationIterators) {
                    if (modificationIterator
                            .dropChangeInTierBulk(oldBulkIndex, oldOffsetToTierBitSet, oldPos)) {
                        modificationIterator
                                .raiseChangeInTierBulk(newBulkIndex, newOffsetToTierBitSet, newPos);
                    }
                }
            }
        }
    }

    public boolean isChanged(long tierIndex, long pos) {
        updateModificationIteratorsArray();
        if (tierIndex <= actualSegments) {
            long segmentIndex = tierIndex - 1;
            long offsetToTierBitSet = segmentIndex * tierModIterBitSetOuterSize;
            for (ModificationIterator modificationIterator : assignedModificationIterators) {
                if (modificationIterator.isChangedSegment(offsetToTierBitSet, pos))
                    return true;
            }
        } else {
            long extraTierIndex = tierIndex - 1 - actualSegments;
            int bulkIndex = (int) (extraTierIndex >> log2TiersInBulk);
            long offsetToTierBitSet =
                    (extraTierIndex & (tiersInBulk - 1)) * tierModIterBitSetOuterSize;
            for (ModificationIterator modificationIterator : assignedModificationIterators) {
                if (modificationIterator.isChangedTierBulk(bulkIndex, offsetToTierBitSet, pos))
                    return true;
            }
        }
        return false;
    }

    @Override
    public boolean identifierCheck(@NotNull ReplicableEntry entry, int chronicleId) {
        return entry.originIdentifier() == identifier();
    }

    @Override
    public void writeExternalEntry(
            ReplicableEntry entry, Bytes payload, @NotNull Bytes destination, int chronicleId) {
        if (payload != null)
            writePayload(payload, destination);
        if (entry != null)
            writeExternalEntry0(entry, destination);
    }

    private void writePayload(Bytes payload, Bytes destination) {
        destination.writeByte(BOOTSTRAP_TIME_HUNK);
        destination.write(payload, payload.readPosition(), payload.readRemaining());
    }

    /**
     * This method does not set a segment lock, A segment lock should be obtained before calling
     * this method, especially when being used in a multi threaded context.
     */
    private void writeExternalEntry0(ReplicableEntry entry, Bytes destination) {
        destination.writeByte(ENTRY_HUNK);

        destination.writeStopBit(entry.originTimestamp());
        if (entry.originIdentifier() == 0)
            throw new IllegalStateException("Identifier can't be 0");
        destination.writeByte(entry.originIdentifier());

        Data key;
        boolean isDeleted;
        if (entry instanceof MapEntry) {
            isDeleted = false;
            key = ((MapEntry) entry).key();
        } else {
            isDeleted = true;
            key = ((MapAbsentEntry) entry).absentKey();
        }

        destination.writeBoolean(isDeleted);

        keySizeMarshaller.writeSize(destination, key.size());
        key.writeTo(destination, destination.writePosition());
        destination.writeSkip(key.size());

        boolean debugEnabled = LOG.isDebugEnabled();
        String message = null;
        if (debugEnabled) {
            if (isDeleted) {
                LOG.debug("WRITING ENTRY TO DEST -  into local-id={}, remove(key={})",
                        identifier(), key);
            } else {
                message = String.format(
                        "WRITING ENTRY TO DEST  -  into local-id=%d, put(key=%s,",
                        identifier(), key);
            }
        }

        if (isDeleted)
            return;

        Data value = ((MapEntry) entry).value();
        valueSizeMarshaller.writeSize(destination, value.size());
        value.writeTo(destination, destination.writePosition());
        destination.writeSkip(value.size());

        if (debugEnabled) {
            LOG.debug(message + "value=" + value + ")");
        }
    }
    
    private ChainingInterface q() {
        ChainingInterface queryContext;
        queryContext = cxt.get();
        if (queryContext == null) {
            queryContext = new CompiledReplicatedMapQueryContext<>(ReplicatedChronicleMap.this);
            cxt.set(queryContext);
        }
        return queryContext;
    }

    @Override
    public CompiledReplicatedMapQueryContext<K, V, R> mapContext() {
        //noinspection unchecked
        return q().getContext(CompiledReplicatedMapQueryContext.class,
                ci -> new CompiledReplicatedMapQueryContext<>(ci, this));
    }

    /**
     * This method does not set a segment lock, A segment lock should be obtained before calling
     * this method, especially when being used in a multi threaded context.
     */
    @Override
    public void readExternalEntry(@NotNull Bytes source, byte remoteNodeIdentifier) {
        byte hunk = source.readByte();
        if (hunk == BOOTSTRAP_TIME_HUNK) {
            setRemoteNodeCouldBootstrapFrom(remoteNodeIdentifier, source.readLong());
        } else {
            assert hunk == ENTRY_HUNK;
            try (CompiledReplicatedMapQueryContext<K, V, R> remoteOpContext = mapContext()) {
                remoteOpContext.processReplicatedEvent(remoteNodeIdentifier, source);
            }
        }
    }

    private ChainingInterface i() {
        ChainingInterface iterContext;
        iterContext = cxt.get();
        if (iterContext == null) {
            iterContext = new CompiledReplicatedMapIterationContext<>(ReplicatedChronicleMap.this);
            cxt.set(iterContext);
        }
        return iterContext;
    }

    public CompiledReplicatedMapIterationContext<K, V, R> iterationContext() {
        //noinspection unchecked
        return i().getContext(CompiledReplicatedMapIterationContext.class,
                ci -> new CompiledReplicatedMapIterationContext<>(ci, this));
    }

    /**
     * <p>Once a change occurs to a map, map replication requires that these changes are picked up
     * by another thread, this class provides an iterator like interface to poll for such changes.
     * </p> <p>In most cases the thread that adds data to the node is unlikely to be the same thread
     * that replicates the data over to the other nodes, so data will have to be marshaled between
     * the main thread storing data to the map, and the thread running the replication. </p> <p>One
     * way to perform this marshalling, would be to pipe the data into a queue. However, This class
     * takes another approach. It uses a bit set, and marks bits which correspond to the indexes of
     * the entries that have changed. It then provides an iterator like interface to poll for such
     * changes. </p>
     *
     * @author Rob Austin.
     */
    public class ModificationIterator implements Replica.ModificationIterator {
        private final byte remoteIdentifier;
        private final long segmentBitSetsAddr;
        private final long offsetToBitSetsWithinATierBulk;

        private ModificationNotifier modificationNotifier;

        private long bootstrapTimeAfterNextIterationComplete = 0L;
        private boolean somethingSentOnThisIteration = false;

        // The iteration "cursor" consists of 4 fields:
        // 1) if segmentIndex >= 0, bulkIndex = -1, tierIndexOffsetWithinBulk = -1:
        // => we are in "first tiers" aka "segments"
        // 2) segmentIndex = -1, bulkIndex >= 0, tierIndexOffsetWithinBulk >= 0:
        // => we are in extra tiers
        private int segmentIndex;
        private int bulkIndex;
        private int tierIndexOffsetWithinBulk;
        /**
         * Corresponds to {@link net.openhft.chronicle.hash.impl.stage.entry.HashEntryStages#pos}
         */
        private long entryPos;

        /**
         * Cached addr of the changes bit set of the current tier (which "cursor" points to),
         * to avoid re-computation of this addr in {@link #entryIsStillDirty(long)} and
         * {@link #clearEntry(long)} methods
         */
        private long tierBitSetAddr;

        public ModificationIterator(byte remoteIdentifier, boolean sharedMemoryInit) {
            this.remoteIdentifier = remoteIdentifier;
            segmentBitSetsAddr = bsAddress() + startOfModificationIterators +
                    remoteIdentifier * segmentModIterBitSetsForIdentifierOuterSize;
            if (!sharedMemoryInit) {
                nativeAccess().zeroOut(null, segmentBitSetsAddr,
                        segmentModIterBitSetsForIdentifierOuterSize);
            }
            offsetToBitSetsWithinATierBulk =
                    remoteIdentifier * tierBulkModIterBitSetsForIdentifierOuterSize;

            resetCursor();
        }

        private void resetCursor() {
            segmentIndex = 0;
            bulkIndex = -1;
            tierIndexOffsetWithinBulk = -1;
            entryPos = -1;

            tierBitSetAddr = segmentBitSetsAddr; // + tierModIterBitSetOuterSize * segmentIndex = 0
        }

        public void setModificationNotifier(@NotNull ModificationNotifier modificationNotifier) {
            this.modificationNotifier = modificationNotifier;
        }

        void raiseChangeInSegment(long offsetToTierBitSet, long pos) {
            tierModIterFrame.set(nativeAccess(), null,
                    segmentBitSetsAddr + offsetToTierBitSet, pos);
            if (modificationNotifier != null)
                modificationNotifier.onChange();
        }

        void raiseChangeInTierBulk(int bulkIndex, long offsetToTierBitSet, long pos) {
            TierBulkData tierBulkData = tierBulkOffsets.get(bulkIndex);
            long bitSetAddr = bitSetsAddr(tierBulkData) + offsetToTierBitSet;
            tierModIterFrame.set(nativeAccess(), null, bitSetAddr, pos);
            if (modificationNotifier != null)
                modificationNotifier.onChange();
        }

        boolean dropChangeInSegment(long offsetToTierBitSet, long pos) {
            return tierModIterFrame.clearIfSet(nativeAccess(), null,
                    segmentBitSetsAddr + offsetToTierBitSet, pos);
        }

        boolean dropChangeInTierBulk(int bulkIndex, long offsetToTierBitSet, long pos) {
            TierBulkData tierBulkData = tierBulkOffsets.get(bulkIndex);
            long bitSetAddr = bitSetsAddr(tierBulkData) + offsetToTierBitSet;
            return tierModIterFrame.clearIfSet(nativeAccess(), null, bitSetAddr, pos);
        }

        boolean isChangedSegment(long offsetToTierBitSet, long pos) {
            return tierModIterFrame.isSet(nativeAccess(), null,
                    segmentBitSetsAddr + offsetToTierBitSet, pos);
        }

        boolean isChangedTierBulk(int bulkIndex, long offsetToTierBitSet, long pos) {
            TierBulkData tierBulkData = tierBulkOffsets.get(bulkIndex);
            long bitSetAddr = bitSetsAddr(tierBulkData) + offsetToTierBitSet;
            return tierModIterFrame.isSet(nativeAccess(), null, bitSetAddr, pos);
        }

        private long bitSetsAddr(TierBulkData tierBulkData) {
            return tierBulkData.bytesStore.address(tierBulkData.offset) +
                    offsetToBitSetsWithinATierBulk;
        }

        /**
         * you can continue to poll hasNext() until data becomes available. If are are in the middle
         * of processing an entry via {@code nextEntry}, hasNext will return true until the bit is
         * cleared
         *
         * @return true if there is an entry
         */
        @Override
        public boolean hasNext() {
            return nextEntryPos(null, 0) != NOT_FOUND;
        }

        private long nextEntryPos(Callback callback, int chronicleId) {
            long nextEntryPos;
            boolean allBitSetsScannedFromTheStart = false;
            // at most 2 iterations
            while (!allBitSetsScannedFromTheStart) {
                if (segmentIndex >= 0) {
                    allBitSetsScannedFromTheStart = segmentIndex == 0 && entryPos == -1;
                    if (allBitSetsScannedFromTheStart) {
                        bootstrapTimeAfterNextIterationComplete = currentTime();
                        somethingSentOnThisIteration = false;
                    }

                    while (segmentIndex < actualSegments) {
                        // This is needed to ensure, that any entry update with the timestamp,
                        // smaller than assigned for bootstrapTimeAfterNextIterationComplete, is
                        // visible during the iteration of the current segment. Bits are raised
                        // during the update via non-volatile bit set (performance concerns), hence
                        // to guarantee visibility during the iteration, we build a happens-before
                        // between bit raise and bit reading:
                        // bit raised ->
                        // lock released (end of update operation) ->
                        // lock acquired (the following acquireAndReleaseUpdateLock() call) ->
                        // bits are iterated (raised bits should be visible here)
                        if (entryPos == -1) {
                            acquireAndReleaseUpdateLock(segmentIndex);
                        }

                        if ((nextEntryPos = tierModIterFrame.nextSetBit(nativeAccess(),
                                null, tierBitSetAddr, entryPos + 1)) != NOT_FOUND) {
                            return nextEntryPos;
                        } else {
                            segmentIndex++;
                            tierBitSetAddr += tierModIterBitSetOuterSize;
                            entryPos = -1;
                        }
                    }
                    // go to extra bulks
                    segmentIndex = -1;
                    bulkIndex = 0;
                    tierIndexOffsetWithinBulk = 0;
                    if (bulkIndex < globalMutableState().getAllocatedExtraTierBulks())
                        tierBitSetAddr = bitSetsAddr(tierBulkOffsets.get(bulkIndex));
                }
                // for each allocated tier bulk
                while (bulkIndex < globalMutableState().getAllocatedExtraTierBulks()) {
                    while (tierIndexOffsetWithinBulk < tiersInBulk) {
                        if ((nextEntryPos = tierModIterFrame.nextSetBit(nativeAccess(), null,
                                tierBitSetAddr, entryPos + 1)) != NOT_FOUND) {
                            return nextEntryPos;
                        } else {
                            tierIndexOffsetWithinBulk++;
                            tierBitSetAddr += tierModIterBitSetOuterSize;
                            entryPos = -1;
                        }
                    }
                    // go to the next bulk
                    bulkIndex++;
                    tierIndexOffsetWithinBulk = 0;
                    if (bulkIndex < globalMutableState().getAllocatedExtraTierBulks())
                        tierBitSetAddr = bitSetsAddr(tierBulkOffsets.get(bulkIndex));
                }

                resetCursor();

                // we walked through the whole chronicle map instance, "iteration"
                if (callback != null && somethingSentOnThisIteration) {
                    callback.onBootstrapTime(bootstrapTimeAfterNextIterationComplete, chronicleId);
                }
            }
            return NOT_FOUND;
        }

        private void acquireAndReleaseUpdateLock(int segmentIndex) {
            try (CompiledReplicatedMapIterationContext<K, V, R> c = iterationContext()) {
                c.initSegmentIndex(segmentIndex);
                c.updateLock().lock();
            }
        }

        /**
         * @param callback call this to get an entry, this class will take care of the locking
         * @return true if an entry was processed
         */
        @Override
        public boolean nextEntry(@NotNull Callback callback, int chronicleId) {
            while (true) {
                long nextEntryPos = nextEntryPos(callback, chronicleId);
                if (nextEntryPos == NOT_FOUND)
                    return false;
                entryPos = nextEntryPos;

                try (CompiledReplicatedMapIterationContext<K, V, R> context = iterationContext()) {
                    if (segmentIndex >= 0) {
                        // we are in first tiers (aka "segments")
                        context.initSegmentIndex(segmentIndex);
                    } else {
                        // we are in extra tiers
                        TierBulkData tierBulkData = tierBulkOffsets.get(bulkIndex);
                        long tierBaseAddr = tierAddr(tierBulkData, tierIndexOffsetWithinBulk);
                        long tierCountersAreaAddr = tierBaseAddr + tierHashLookupOuterSize;
                        context.initSegmentIndex(
                                TierCountersArea.segmentIndex(tierCountersAreaAddr));
                        int tier = TierCountersArea.tier(tierCountersAreaAddr);
                        long tierIndex = actualSegments +
                                (bulkIndex << log2TiersInBulk) + tierIndexOffsetWithinBulk + 1;
                        context.initSegmentTier(tier, tierIndex, tierBaseAddr);
                    }

                    context.updateLock().lock();

                    if (entryIsStillDirty(entryPos)) {
                        context.readExistingEntry(entryPos);
                        ReplicableEntry entry = (ReplicableEntry) context.entryForIteration();
                        callback.onEntry(entry, chronicleId);
                        somethingSentOnThisIteration = true;
                        clearEntry(entryPos);
                        return true;
                    }
                    // if the entryPos was already cleared by another thread
                    // while we were trying to obtain segment lock (for example, in relocation()),
                    // go to pick up next (next iteration in the `while (true)` loop)
                }
            }
        }

        private boolean entryIsStillDirty(long entryPos) {
            return tierModIterFrame.get(nativeAccess(), null, tierBitSetAddr, entryPos);
        }

        private void clearEntry(long entryPos) {
            tierModIterFrame.clear(nativeAccess(), null, tierBitSetAddr, entryPos);
        }

        @Override
        public void dirtyEntries(long fromTimeStamp) {
            try (CompiledReplicatedMapIterationContext<K, V, R> c = iterationContext()) {
                // iterate over all the segments and mark bit in the modification iterator
                // that correspond to entries with an older timestamp
                boolean debugEnabled = LOG.isDebugEnabled();
                for (int segmentIndex = 0; segmentIndex < actualSegments; segmentIndex++) {
                    c.initSegmentIndex(segmentIndex);
                    c.forEachSegmentReplicableEntry(e -> {
                        if (debugEnabled) {
                            LOG.debug("Bootstrap entry: id {}, key {}, value {}", localIdentifier,
                                    c.key(), c.value());
                        }
                        // Bizarrely the next line line cause NPE in JDT compiler
                        //assert re.originTimestamp() > 0L;
                        if (debugEnabled) {
                            LOG.debug("Bootstrap decision: bs ts: {}, entry ts: {}, " +
                                            "entry id: {}, local id: {}",
                                    fromTimeStamp, e.originTimestamp(),
                                    e.originIdentifier(), localIdentifier);
                        }
                        // TODO currently, all entries, originating not from the current node,
                        // are bootstrapped. This could be optimized, but requires to generate
                        // unique connection id, it identify two ChronicleMap instances
                        // reconnecting vs. different Map start-up
                        if (e.originIdentifier() != localIdentifier ||
                                e.originTimestamp() >= fromTimeStamp) {
                            raiseChange0(c.tierIndex(), c.pos());
                        }
                    });
                }
            }
        }

        void raiseChange0(long tierIndex, long pos) {
            if (tierIndex <= actualSegments) {
                long segmentIndex = tierIndex - 1;
                long offsetToTierBitSet = segmentIndex * tierModIterBitSetOuterSize;
                raiseChangeInSegment(offsetToTierBitSet, pos);
            } else {
                long extraTierIndex = tierIndex - 1 - actualSegments;
                int bulkIndex = (int) (extraTierIndex >> log2TiersInBulk);
                long offsetToTierBitSet =
                        (extraTierIndex & (tiersInBulk - 1)) * tierModIterBitSetOuterSize;
                raiseChangeInTierBulk(bulkIndex, offsetToTierBitSet, pos);
            }
        }

        void dropChange0(long tierIndex, long pos) {
            if (tierIndex <= actualSegments) {
                long segmentIndex = tierIndex - 1;
                long offsetToTierBitSet = segmentIndex * tierModIterBitSetOuterSize;
                dropChangeInSegment(offsetToTierBitSet, pos);
            } else {
                long extraTierIndex = tierIndex - 1 - actualSegments;
                int bulkIndex = (int) (extraTierIndex >> log2TiersInBulk);
                long offsetToTierBitSet =
                        (extraTierIndex & (tiersInBulk - 1)) * tierModIterBitSetOuterSize;
                dropChangeInTierBulk(bulkIndex, offsetToTierBitSet, pos);
            }
        }

        public void clearRange0(long tierIndex, long pos, long endPosExclusive) {
            if (tierIndex <= actualSegments) {
                long segmentIndex = tierIndex - 1;
                long offsetToTierBitSet = segmentIndex * tierModIterBitSetOuterSize;
                tierModIterFrame.clearRange(nativeAccess(), null,
                        segmentBitSetsAddr + offsetToTierBitSet, pos, endPosExclusive);
            } else {
                long extraTierIndex = tierIndex - 1 - actualSegments;
                int bulkIndex = (int) (extraTierIndex >> log2TiersInBulk);
                long offsetToTierBitSet =
                        (extraTierIndex & (tiersInBulk - 1)) * tierModIterBitSetOuterSize;
                TierBulkData tierBulkData = tierBulkOffsets.get(bulkIndex);
                long bitSetAddr = bitSetsAddr(tierBulkData) + offsetToTierBitSet;
                tierModIterFrame.clearRange(nativeAccess(), null, bitSetAddr, pos, endPosExclusive);
            }
        }
    }
}