Question

I'm working with a 10-node Infinispan cluster used as a Hibernate Search backend. Our servers are running TC server 2.5 (tomcat 6.0.32) on Java 1.6_24. We are using jGroups 2.12.1.3 for handling cluster cache writes from each node, and for multicast UDP transport.

When we launch 3+ nodes in our cluster, eventually one of the nodes begins to log replication timeouts. We've observed the same result whether we configure Infinispan for replication or for distribution cache modes. Although the rest of the cluster remains stable, the failing node becomes essentially unsuable for search.

Our configuration:

Infinispan:

<?xml version="1.0" encoding="UTF-8"?>
 <infinispan
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="urn:infinispan:config:5.0 http://www.infinispan.org/schemas/infinispan-config-5.0.xsd"
    xmlns="urn:infinispan:config:5.0">
<global>
    <globalJmxStatistics
        enabled="true"
        cacheManagerName="HibernateSearch"
        allowDuplicateDomains="true" />
    <transport
        clusterName="HibernateSearch-Infinispan-cluster-MT"
        distributedSyncTimeout="50000">
        <properties>
            <property name="configurationFile" value="infinispan-udp.cfg.xml" />
        </properties>
    </transport>
    <shutdown
        hookBehavior="DONT_REGISTER" />
</global>


<default>
    <locking
        lockAcquisitionTimeout="20000"
        writeSkewCheck="false"
        concurrencyLevel="5000"
        useLockStriping="false" />
    <storeAsBinary storeKeysAsBinary="false" storeValuesAsBinary="true"
        enabled="false" />
    <invocationBatching
        enabled="true" />
    <clustering
        mode="replication">
        <stateRetrieval
            timeout="60000"
            logFlushTimeout="65000"
            fetchInMemoryState="true"
            alwaysProvideInMemoryState="true" />
        <sync
            replTimeout="50000" />
        <l1 enabled="false" />
    </clustering>
    <jmxStatistics
        enabled="true" />
    <eviction
        maxEntries="-1"
        strategy="NONE" />
    <expiration
        maxIdle="-1" />
</default>


<namedCache
    name="LuceneIndexesMetadata">
    <clustering
        mode="replication">
        <stateRetrieval
            fetchInMemoryState="true"
            logFlushTimeout="30000" />
        <sync
            replTimeout="50000" />
        <l1 enabled="false" />
    </clustering>
    <locking
        lockAcquisitionTimeout="20000"
        writeSkewCheck="false"
        concurrencyLevel="5000"
        useLockStriping="false" />
    <loaders shared="true" preload="true">
        <loader class="org.infinispan.loaders.jdbm.JdbmCacheStore" fetchPersistentState="false" ignoreModifications="false" purgeOnStartup="false">
            <properties>
                <property name="location" value="/usr/local/tc/.index/metadata" />
            </properties>
        </loader>
    </loaders>
</namedCache>
<namedCache
    name="LuceneIndexesData">
    <clustering
        mode="replication">
        <stateRetrieval
            fetchInMemoryState="true"
            logFlushTimeout="30000" />
        <sync
            replTimeout="50000" />
        <l1 enabled="false" />
    </clustering>
    <locking
        lockAcquisitionTimeout="20000"
        writeSkewCheck="false"
        concurrencyLevel="5000"
        useLockStriping="false" />
    <loaders shared="true" preload="true">
        <loader class="org.infinispan.loaders.jdbm.JdbmCacheStore" fetchPersistentState="false" ignoreModifications="false" purgeOnStartup="false">
            <properties>
                <property name="location" value="/usr/local/tc/.index/data" />
            </properties>
        </loader>
    </loaders>
</namedCache>
<namedCache
    name="LuceneIndexesLocking">
    <clustering
        mode="replication">
        <stateRetrieval
            fetchInMemoryState="true"
            logFlushTimeout="30000" />
        <sync
            replTimeout="50000" />
        <l1 enabled="false" />
    </clustering>
    <locking
        lockAcquisitionTimeout="20000"
        writeSkewCheck="false"
        concurrencyLevel="5000"
        useLockStriping="false" />
</namedCache>

jGroups (UDP):

<config xmlns="urn:org:jgroups"
        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="urn:org:jgroups http://www.jgroups.org/schema/JGroups-2.12.xsd">
<UDP
     mcast_addr="${jgroups.udp.mcast_addr:228.10.10.9}"
     mcast_port="${jgroups.udp.mcast_port:45599}"
     tos="8"
     ucast_recv_buf_size="20000000"
     ucast_send_buf_size="640000"
     mcast_recv_buf_size="25000000"
     mcast_send_buf_size="640000"
     loopback="true"
     discard_incompatible_packets="true"
     max_bundle_size="64000"
     max_bundle_timeout="30"
     ip_ttl="${jgroups.udp.ip_ttl:2}"
     enable_bundling="true"
     enable_diagnostics="false"
     thread_naming_pattern="pl"
     thread_pool.enabled="true"
     thread_pool.min_threads="2"
     thread_pool.max_threads="30"
     thread_pool.keep_alive_time="5000"
     thread_pool.queue_enabled="false"
     thread_pool.queue_max_size="100"
     thread_pool.rejection_policy="Discard"
     oob_thread_pool.enabled="true"
     oob_thread_pool.min_threads="2"
     oob_thread_pool.max_threads="30"
     oob_thread_pool.keep_alive_time="5000"
     oob_thread_pool.queue_enabled="false"
     oob_thread_pool.queue_max_size="100"
     oob_thread_pool.rejection_policy="Discard"
     />


And the errors we observe:

10-31-2011 13:53:02 ERROR Hibernate Search: Directory writer-3 interceptors.InvocationContextInterceptor: ISPN000136: Execution error
org.infinispan.util.concurrent.TimeoutException: Replication timeout for tc-cluster-0105-21082
          at org.infinispan.remoting.transport.AbstractTransport.parseResponseAndAddToResponseList(AbstractTransport.java:71)
          at org.infinispan.remoting.transport.jgroups.JGroupsTransport.invokeRemotely(JGroupsTransport.java:452)
          at org.infinispan.remoting.rpc.RpcManagerImpl.invokeRemotely(RpcManagerImpl.java:132)
          at org.infinispan.remoting.rpc.RpcManagerImpl.invokeRemotely(RpcManagerImpl.java:156)
          at org.infinispan.remoting.rpc.RpcManagerImpl.invokeRemotely(RpcManagerImpl.java:265)
          at org.infinispan.remoting.rpc.RpcManagerImpl.invokeRemotely(RpcManagerImpl.java:252)
          at org.infinispan.remoting.rpc.RpcManagerImpl.broadcastRpcCommand(RpcManagerImpl.java:235)
          at org.infinispan.remoting.rpc.RpcManagerImpl.broadcastRpcCommand(RpcManagerImpl.java:228)
          at org.infinispan.interceptors.ReplicationInterceptor.handleCrudMethod(ReplicationInterceptor.java:116)
          at org.infinispan.interceptors.ReplicationInterceptor.visitPutKeyValueCommand(ReplicationInterceptor.java:79)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
          at org.infinispan.interceptors.LockingInterceptor.visitPutKeyValueCommand(LockingInterceptor.java:294)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
          at org.infinispan.interceptors.base.CommandInterceptor.handleDefault(CommandInterceptor.java:133)
          at org.infinispan.commands.AbstractVisitor.visitPutKeyValueCommand(AbstractVisitor.java:60)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
          at org.infinispan.interceptors.TxInterceptor.enlistWriteAndInvokeNext(TxInterceptor.java:214)
          at org.infinispan.interceptors.TxInterceptor.visitPutKeyValueCommand(TxInterceptor.java:162)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
          at org.infinispan.interceptors.CacheMgmtInterceptor.visitPutKeyValueCommand(CacheMgmtInterceptor.java:114)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
          at org.infinispan.interceptors.InvocationContextInterceptor.handleAll(InvocationContextInterceptor.java:104)
          at org.infinispan.interceptors.InvocationContextInterceptor.handleDefault(InvocationContextInterceptor.java:64)
          at org.infinispan.commands.AbstractVisitor.visitPutKeyValueCommand(AbstractVisitor.java:60)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
          at org.infinispan.interceptors.BatchingInterceptor.handleDefault(BatchingInterceptor.java:77)
          at org.infinispan.commands.AbstractVisitor.visitPutKeyValueCommand(AbstractVisitor.java:60)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.InterceptorChain.invoke(InterceptorChain.java:274)
          at org.infinispan.CacheImpl.putIfAbsent(CacheImpl.java:524)
          at org.infinispan.CacheSupport.putIfAbsent(CacheSupport.java:74)
          at org.infinispan.lucene.locking.BaseLuceneLock.obtain(BaseLuceneLock.java:65)
          at org.apache.lucene.store.Lock.obtain(Lock.java:72)
          at org.apache.lucene.index.IndexWriter.<init>(IndexWriter.java:1097)
          at org.hibernate.search.backend.Workspace.createNewIndexWriter(Workspace.java:202)
          at org.hibernate.search.backend.Workspace.getIndexWriter(Workspace.java:180)
          at org.hibernate.search.backend.impl.lucene.PerDPQueueProcessor.run(PerDPQueueProcessor.java:103)
          at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
          at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
          at java.lang.Thread.run(Thread.java:662)

Because this error is so pervasive regardless of our topology or caching mode, we believe we must be misconfigured somewhere. Can anyone recommend a fix?

Was it helpful?

Solution

Turns out we had a version clash between Infinispan and Hibernate Search. If you use Hibernate Search 3.4.1, you must use Infinispan 4.2.1, later versions may not work.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top