集群安装
本文使用版本为 ignite v2.8.1
- 集群配置
默认安装 jdk 1.8
版本即可,集群每台主机上的 $IGNITE_HOME/config
目录下增加 default.xml
配置文件如下:
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:util="http://www.springframework.org/schema/util"
xsi:schemaLocation="
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans.xsd
http://www.springframework.org/schema/util
http://www.springframework.org/schema/util/spring-util.xsd">
<bean id="grid.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">
<!-- 对等类加载是否启用,默认为true不开启很容易报错 -->
<property name="peerClassLoadingEnabled" value="true"/>
<!-- 系统线程池大小 (max(8, total number of cores)) -->
<property name="systemThreadPoolSize" value="24"/>
<!-- 公共线程池大小 (max(8, total number of cores)) -->
<property name="publicThreadPoolSize" value="8"/>
<!-- 查询线程池大小 (max(8, total number of cores)) -->
<property name="queryThreadPoolSize" value="8"/>
<!-- 服务线程池大小 (max(8, total number of cores)) -->
<property name="serviceThreadPoolSize" value="8"/>
<!-- 源线程池大小 (max(8, total number of cores)) -->
<property name="stripedPoolSize" value="8"/>
<!-- 数据流线程池大小(max(8, total number of cores) -->
<property name="dataStreamerThreadPoolSize" value="8"/>
<!-- 平衡线程池大小-->
<property name="rebalanceThreadPoolSize" value="8"/>
<!-- 用户验证是否开启 默认为false 开启后默认用户名密码都是ignite -->
<property name="authenticationEnabled" value="true"/>
<!-- 对象序列化过程 -->
<property name="marshaller">
<bean class="org.apache.ignite.internal.binary.BinaryMarshaller" />
</property>
<!-- 数据存储配置 -->
<property name="dataStorageConfiguration">
<bean class="org.apache.ignite.configuration.DataStorageConfiguration">
<!--并发性水平 可由自己实际情况而定 -->
<property name="concurrencyLevel" value="200"/>
<!-- 设置内存页大小 (getconf PAGESIZE) -->
<property name="pageSize" value="#{4 * 1024}"/>
<!-- Size of the WAL (Write Ahead Log) segment -->
<property name="walSegmentSize" value="#{1024 * 1024 * 1024}"/>
<!--In our experience LOG_ONLY is a good compromise between durability and performance.-->
<property name="walMode" value="LOG_ONLY"/>
<!-- Enable write throttling. -->
<property name="writeThrottlingEnabled" value="true"/>
<!-- 检查点频率-->
<!--Checkpointing frequency which is a minimal interval when the dirty pages will be written to the Persistent Store.-->
<property name="checkpointFrequency" value="180000"/>
<!--数据分布配置 默认是都存放到内存中,此处进行持久化 -->
<property name="defaultDataRegionConfiguration">
<bean class="org.apache.ignite.configuration.DataRegionConfiguration">
<!--是否持久化到磁盘 true为持久化 -->
<property name="persistenceEnabled" value="true"/>
<property name="name" value="vehicle_Region"/>
<!-- 2G initial size. 初始化内存-->
<property name="initialSize" value="#{2L * 1024 * 1024 * 1024}" />
<!-- 30G maximum size. 最大内存大小-->
<property name="maxSize" value="#{30L * 1024 * 1024 * 1024}" />
<!-- 8G 内存页缓存大小-->
<property name="checkpointPageBufferSize" value="#{8L *1024* 1024 * 1024L}" />
</bean>
</property>
<!-- Defining several data regions for different memory regions 持久化数据存储目录 -->
<property name="storagePath" value="/data/ignite/storage" />
<property name="walArchivePath" value="/data/ignite/walArchive" />
<property name="walPath" value="/data/ignite/wal" />
</bean>
</property>
<property name="metricsLogFrequency" value="0"/>
<!--失败检测 超时时长-->
<property name="failureDetectionTimeout" value="#{60 * 60 * 1000}"/>
<!-- 服务worker 之间交互 timeout 时间,默认 10s -->
<property name="systemWorkerBlockedTimeout" value="#{60 * 60 * 1000}"/>
<!-- 服务出现故障自动重启 -->
<property name="failureHandler">
<bean class="org.apache.ignite.failure.RestartProcessFailureHandler"/>
</property>
<!-- 文件缓存服务 -->
<property name="fileSystemConfiguration">
<list>
<bean class="org.apache.ignite.configuration.FileSystemConfiguration">
<!-- Distinguished file system name. -->
<property name="name" value="igfs1" />
<property name="blockSize" value="#{128 * 1024}"/>
<property name="perNodeBatchSize" value="512"/>
<property name="perNodeParallelBatchCount" value="16"/>
<property name="prefetchBlocks" value="32"/>
<!-- Set default mode. -->
<property name="defaultMode" value="DUAL_ASYNC" />
<!-- Configure '/tmp' and all child paths to work in PRIMARY mode. -->
<property name="pathModes">
<map>
<entry key="/tmp/.*" value="PRIMARY"/>
</map>
</property>
</bean>
</list>
</property>
<property name="cacheConfiguration">
<bean class="org.apache.ignite.configuration.CacheConfiguration">
<!-- Set a cache name. -->
<property name="name" value="memdb2"/>
<!-- Set asynchronous rebalancing. -->
<property name="rebalanceMode" value="ASYNC"/>
<!-- Set cache mode. 分区模式,副本为 2 -->
<property name="cacheMode" value="PARTITIONED"/>
<property name="backups" value="2"/>
<!-- 副本同步模式: -->
<!-- PRIMARY_SYNC (默认模式,primary 写成功即可算成功,从backup节点读数据,有可能读到的任然是旧数据) -->
<!-- FULL_SYNC (写cache的操作在primary节点和backup节点都成功写入后返回, 保证了写入成功后节点之间的数据都一样) -->
<!-- FULL_ASYNC (写cache的操作不用等primary节点和backup节点成功写入即可返回, 读primary节点的数据都有可能读到旧数据) -->
<property name="writeSynchronizationMode" value="PRIMARY_SYNC"/>
<!-- 分区丢失处理: -->
<!-- IGNORE (默认模式,即使出现了partition loss的情况,Ignite会自动忽略并且会清空和partion loss相关的状态不会触发EVT_CACHE_REBALANCE_PART_DATA_LOST 事件) -->
<!-- READ_WRITE_ALL (Ignite允许所有的读写操作,就好像partition loss没发生过) -->
<!-- READ_WRITE_SAFE (允许对没有丢失的partition的读写操作,但是对已经丢失的partition的读写操作会失败并抛异常) -->
<!-- READ_ONLY_ALL (允许对丢失的和正常的partition的读操作,但是写操作会失败并抛异常) -->
<!-- READ_ONLY_SAFE (所有的写操作和对丢失partition的读操作都会失败并抛异常。允许对正常的partition的读操作) -->
<property name="partitionLossPolicy" value="READ_WRITE_ALL"/>
<!-- enable disk page compression for this cache -->
<property name="diskPageCompression" value="SNAPPY"/>
<!-- optionally set the compression level -->
<property name="diskPageCompressionLevel" value="10"/>
</bean>
</property>
<!-- Set batch size. -->
<property name="rebalanceBatchSize" value="#{1 * 1024 * 1024 * 1024}"/>
<!-- Set throttle interval. -->
<property name="rebalanceThrottle" value="100"/>
<!--
Explicitly configure TCP discovery SPI to provide list of initial nodes.
Ignite自己本身有发现机制,只需要配置静态IP即可相互发现;单机只需要配置自己即可
-->
<property name="discoverySpi">
<bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
<property name="ipFinder">
<bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
<property name="addresses">
<!--此处放置全部节点IP 如下方-->
<list>
<value>172.21.18.16:47500..47509</value>
<value>172.21.18.17:47500..47509</value>
<value>172.21.18.19:47500..47509</value>
</list>
</property>
</bean>
</property>
</bean>
</property>
</bean>
</beans>
注
创建数据存放目录 mkdir -p /data/ignite/{storage,walArchive,wal}
- 启动集群
在每台集群主机上执行如下命令即可
# 安装 jdk 1.8
$ wget http://repo.hdp.com/jdk-8u211-linux-x64.tar.gz
$ tar xvf jdk-8u211-linux-x64.tar.gz
$ mkdir -p /usr/jdk64 && mv jdk1.8.0_211 /usr/jdk64/
# 添加环境变量
$ echo "export JAVA_HOME=/usr/jdk64/jdk1.8.0_211" >> /etc/profile
$ echo "export IGNITE_HOME=/root/apache-ignite" >> /etc/profile
# 启动服务
$ ./bin/ignite.sh config/default.xml &
# 激活
$ ./bin/control.sh --user ignite --password ignite --activate
# 查看状态
$ ./bin/control.sh --user ignite --password ignite --state
# 查看集群基线
$ ./bin/control.sh --user ignite --password ignite --baseline
# 从集群中手动删除节点
$ ./bin/control.sh --user ignite --password ignite --baseline remove 75a706e0-f40f-4418-84d9-93e06b853e45
# 使用 sqlline 访问数据库,用户名默认为 ignite/ignite
$ ./bin/sqlline.sh --color=true --verbose=true -n ignite -p ignite -u jdbc:ignite:thin://172.19.3.97,172.19.3.98,172.19.3.99/
注
务必关闭服务器防火墙,否则会出现集群找不到其他节点信息
注
优化参考方案:
常见问题
- 配置不一样导致
Remote node has peer class loading enabled flag different from local [locId8=d9ec5b41, locPeerClassLoading=true, rmtId8=3d11ed2e, rmtPeerClassLoading=false
注
将所有配置改为一致即可, 上述错误主要由于 peerClassLoadingEnabled = true
选项导致
注
配置不一致也会导致如下错误:Caused by: class org.apache.ignite.spi.IgniteSpiException: BaselineTopology of joining node (2af934c1-1936-4c2b-8368-7107a22045fc) is not compatible with BaselineTopology in the cluster. Branching history of cluster BlT ([953524018]) doesn't contain branching point hash of joining node BlT (310856721). Consider cleaning persistent storage of the node and adding it to the cluster again.
- 通过组播网络进行配置
<!--
Explicitly configure TCP discovery SPI to provide list of initial nodes.
Ignite自己本身有发现机制,只需要配置静态IP即可相互发现;单机只需要配置自己即可
-->
<property name="discoverySpi">
<bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
<property name="ipFinder">
<bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.multicast.TcpDiscoveryMulticastIpFinder">
<!-- 此处放置组播ip -->
<property name="multicastGroup" value="228.10.10.157"/>
<!-- 此处放置指定集群内的节点 -->
<property name="addresses">
<!--此处放置全部节点IP 如下方-->
<list>
<value>172.28.0.195:47500..47509</value>
<value>172.28.0.196:47500..47509</value>
<value>172.28.0.197:47500..47509</value>
</list>
</property>
</bean>
</property>
</bean>
</property>
- 通过
zookeeper
进行配置
<property name="discoverySpi">
<bean class="org.apache.ignite.spi.discovery.zk.ZookeeperDiscoverySpi">
<!-- zookeeper ip 地址 -->
<property name="zkConnectionString" value="172.8.4.75:2181"/>
<property name="sessionTimeout" value="30000"/>
<!-- zookeeper 内 node 名称 -->
<property name="zkRootPath" value="/apacheIgnite"/>
<property name="joinTimeout" value="10000"/>
</bean>
</property>
-
GC
优化
在 bin/ignite.sh
脚本中,对 JVM GC
进行优化如下:
#
# JVM options. See http://java.sun.com/javase/technologies/hotspot/vmoptions.jsp for more details.
#
# ADD YOUR/CHANGE ADDITIONAL OPTIONS HERE
#
if [ -z "$JVM_OPTS" ] ; then
# heap 过大会造成 gc 时间过长,选取个合适的大小即可
JVM_OPTS="-Xms10g -Xmx10g -Xmn6g -server -XX:MaxMetaspaceSize=8G"
# 注 -XX:MaxDirectMemorySize = walSegmentSize * 4, 参考 [durable-memory-tuning](https://apacheignite.readme.io/docs/durable-memory-tuning)
JVM_OPTS="-XX:MaxDirectMemorySize=4G"
fi
#
# Uncomment the following GC settings if you see spikes in your throughput due to Garbage Collection.
# -XX:+AlwaysPreTouch 减少新生代晋升到老年代时停顿
# -XX:+ScavengeBeforeFullGC 年轻代GC优于Full GC执行
# -XX:+DisableExplicitGC 不响应 System.gc() 代码
#
JVM_OPTS="$JVM_OPTS -XX:+UseG1GC"
JVM_OPTS="$JVM_OPTS -XX:+AlwaysPreTouch -XX:+ScavengeBeforeFullGC -XX:+DisableExplicitGC -XX:MaxGCPauseMillis=200 -XX:InitiatingHeapOccupancyPercent=45 "
#
# GC logs
#
JVM_OPTS="$JVM_OPTS -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=../heapdump -XX:+ExitOnOutOfMemoryError"
JVM_OPTS="$JVM_OPTS -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintAdaptiveSizePolicy"
JVM_OPTS="$JVM_OPTS -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=100M -Xloggc:../gc.log"
系统优化内容如下:
- 如果GC日志显示:
low user time, low system time, long GC pause
,那么一个原因就是GC
线程因为内核等待I/O
而卡住了,发生的原因基本是日志提交或者因为日志滚动的gzip
导致改变的文件系统刷新。
# 增加页面刷新到磁盘的频率,从默认的30秒到5秒
sysctl -w vm.dirty_writeback_centisecs=500
sysctl -w vm.dirty_expire_centisecs=500
# 检查并且降低‘swappiness’的设定来保护堆和匿名内存
sysctl -w vm.swappiness=10
# 关闭NUMA zone-reclaim优化
sysctl -w vm.zone_reclaim_mode=0
# 关闭transparent_hugepage
echo never > /sys/kernel/mm/redhat_transparent_hugepage/enabled
echo never > /sys/kernel/mm/redhat_transparent_hugepage/defrag
# 避免内存页面直接回收导致的长时间GC暂停,在Linux的最新内核版本中,
# 可以通过/proc/sys/vm/extra_free_kbytes设置
# 在wmark_min和wmark_low之间增加额外的字节来避免前述的延迟
sysctl -w vm.extra_free_kbytes=1240000
-
java
代码方式进行初始化
public void init() {
//设置集群组播查找,设置本机ip/127.0.0.1。
//finder设置其他节点ip:port
//TcpDiscoveryMulticastIpFinder ipFinder = new TcpDiscoveryMulticastIpFinder();
//设置静态IP查找,设置本机ip/127.0.0.1。
TcpDiscoveryVmIpFinder ipFinder = new TcpDiscoveryVmIpFinder();//静态ip查找
ipFinder.setAddresses(Arrays.asList(localIp, finder));
//设置集群的本地侦听端口。建议固定一个端口
TcpDiscoverySpi spi = new TcpDiscoverySpi();
spi.setLocalPort(localPort);
spi.setLocalPortRange(0);
spi.setIpFinder(ipFinder);
//设置集群的数据通讯的本地侦听端口
TcpCommunicationSpi ipCom = new TcpCommunicationSpi();
ipCom.setLocalPort(communicationPort);
ipCom.setMessageQueueLimit(32);//避免队列过大导致OOME
//固化内存,能使用的堆外内存大小,用于存储缓存数据。与-XX:MaxDirectMemorySize配置一致
DataStorageConfiguration dsCfg = new DataStorageConfiguration();
DataRegionConfiguration drCfg = new DataRegionConfiguration();
drCfg.setInitialSize(initSize);
drCfg.setMaxSize(maxSize);
if (persistence) {
//数据本地持久化,如果开启,则重启不会丢失缓存数据,可避免应该缓存的数据因内存不足而丢弃的问题。
//如果开启,则当日志发现“Page evictions starts”,说明内存不够,数据大量写入磁盘,性能低,建议设置更大的内存
drCfg.setPersistenceEnabled(true);
dsCfg.setWalMode(WALMode.LOG_ONLY);//调优建议
dsCfg.setWriteThrottlingEnabled(true);//调优建议,关于Checkpoint
} else {
//当内存使用达到阈值(默认90%)时,最老的数据会被删除
drCfg.setPageEvictionMode(DataPageEvictionMode.RANDOM_LRU);
}
dsCfg.setDefaultDataRegionConfiguration(drCfg);//默认内存策略
//缓存配置
CacheConfiguration<Long, JetAddress> cacheCfg = new CacheConfiguration<>(ADDRESS_CACHE);
cacheCfg.setIndexedTypes(Long.class, JetAddress.class);
cacheCfg.setSqlFunctionClasses(IgniteFunction.class);
cacheCfg.setSqlSchema("PUBLIC");
if (model == 0) {
//缓存“分区”模式,各节点保持一部份数据。
cacheCfg.setCacheMode(CacheMode.PARTITIONED);
//设置缓存备份,当一个节点关闭时,另一个节点备份的数据就会使用上。
cacheCfg.setBackups(1);
} else {
//缓存“复制”模式,各节点保持全数据,无须备份,查询性能更好
cacheCfg.setCacheMode(CacheMode.REPLICATED);
}
//设置缓存数据过期时间,默认10分钟
cacheCfg.setExpiryPolicyFactory(CreatedExpiryPolicy.factoryOf(new Duration(MINUTES, expiry)));
IgniteConfiguration igniteCfg = new IgniteConfiguration();
igniteCfg.setCommunicationSpi(ipCom);
igniteCfg.setDiscoverySpi(spi);
igniteCfg.setCacheConfiguration(cacheCfg);
igniteCfg.setWorkDirectory(storePath);//工作目录,数据存储路径
igniteCfg.setDataStorageConfiguration(dsCfg);
//Logger logger = LoggerFactory.getLogger("org.apache.ignite");
igniteCfg.setGridLogger(new Slf4jLogger(logger));
ignite = Ignition.start(igniteCfg);
ignite.active(true);//必须阻塞等待激活
logger.info("Cache example started.");
IgniteCache<Long, JetAddress> addrCache = ignite.getOrCreateCache(cacheCfg);
IgniteAtomicSequence atomicSequence = ignite.atomicSequence("JetAtomicSequence", 0, true);
logger.info("{}", atomicSequence.get());
long id = atomicSequence.incrementAndGet();
//Insert
SqlFieldsQuery qry = new SqlFieldsQuery(
"insert into JetAddress (_key, id, province) values (?, ?, ?)");
addrCache.query(qry.setArgs(id, id, "浙江省"));
// Put
JetAddress ja2 = new JetAddress();
ja2.setId(atomicSequence.incrementAndGet());
ja2.setProvince("浙江省2");
addrCache.put(atomicSequence.get(), ja2);
//Get
JetAddress ja = addrCache.get(id);
logger.info("Get province:{}", ja.getProvince());
//SqlFieldsQuery
String sql = "select province from JetAddress where province like '浙%' limit 1000";
SqlFieldsQuery query = new SqlFieldsQuery(sql);
try (QueryCursor<List<?>> cursor = addrCache.query(query)) {
for (List<?> entry : cursor) {
logger.info("SqlFieldsQuery province:{}", entry.toString());
}
} catch (Exception e) {
logger.warn(e.getMessage(), e);
}
//SqlQuery
SqlQuery<Long, JetAddress> query2 = new SqlQuery<Long, JetAddress>(JetAddress.class, " matchFun(province, ?)=1");
try (QueryCursor<Cache.Entry<Long, JetAddress>> cursor = addrCache.query(query2.setArgs("浙"))) {
for (Entry<Long, JetAddress> entry : cursor) {
JetAddress jAddress = entry.getValue();
logger.info("SqlQuery province:{}", jAddress.getProvince());
}
} catch (Exception e) {
logger.warn(e.getMessage(), e);
}
logger.info("Cache example finished.");
}
上述代码节选至 Apache Ignite 2.3使用例子与问题总结
- 压缩包安装
注
ignite-compress
模块使能,需执行 cp $IGNITE_HOME/libs/optional/ignite-compress $IGNITE_HOME/libs/
进行压缩模块安装
参考
wal 日志使能
-
monit
保证服务稳定
在 /etc/monit.d/services.cfg
文件里追加如下内容即可:
check process ignite
matching "org.apache.ignite.startup.cmdline.CommandLineStartup"
start program = "/root/apache-ignite/bin/ignite.sh /root/apache-ignite/config/default-config.xml"
stop program = "/bin/bash -c 'kill -s SIGTERM `ps -ef | grep org.apache.ignite.startup.cmdline.CommandLineStartup | grep -v grep | awk -F" " '{print $2}'`'"
- 使用
centos
服务 及monit
保证服务稳定运行
编辑 vi /etc/systemd/system/ignite.service
,添加如下内容:
[Unit]
Description=Apache Ignite Service
After=network.target
[Service]
Type=simple
WorkingDirectory="/root/apache-ignite"
PrivateDevices=yes
ProtectSystem=full
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGTERM
TimeoutStopSec=10
ExecStart="/root/apache-ignite/bin/ignite.sh"
SyslogIdentifier=Ignite
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
Alias=ignite.service
执行 systemctl daemon-reload
加载服务,接下来修改 /etc/monit.d/services.cfg
配置文件并加载监控 monit reload
即可:
check process ignite
matching "org.apache.ignite.startup.cmdline.CommandLineStartup"
start program "/usr/bin/systemctl start ignite.service"
stop program "/usr/bin/systemctl stop ignite.service"
- 新增设备报
IgniteCheckedException
异常
[17:44:21,138][SEVERE][main][IgniteKernal] Got exception while starting (will rollback startup routine).
class org.apache.ignite.IgniteCheckedException: Affinity key backups mismatch [cacheName=memdb2, localAffinityKeyBackups=2, remoteAffinityKeyBackups=1, rmtNodeId=9cbc7db4-9777-45ef-a1e1-bc54bd57e7fb] Fix cache configuration or set system property -DIGNITE_SKIP_CONFIGURATION_CONSISTENCY_CHECK=true.
at org.apache.ignite.internal.processors.cache.GridCacheUtils.throwIgniteCheckedException(GridCacheUtils.java:1684)
at org.apache.ignite.internal.processors.cache.GridCacheUtils.checkAttributeMismatch(GridCacheUtils.java:997)
at org.apache.ignite.internal.processors.cache.ClusterCachesInfo.checkCache(ClusterCachesInfo.java:485)
at org.apache.ignite.internal.processors.cache.ClusterCachesInfo.onKernalStart(ClusterCachesInfo.java:320)
at org.apache.ignite.internal.processors.cache.GridCacheProcessor.onKernalStart(GridCacheProcessor.java:660)
at org.apache.ignite.internal.IgniteKernal.start(IgniteKernal.java:1343)
at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(IgnitionEx.java:2045)
at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(IgnitionEx.java:1703)
at org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.java:1117)
at org.apache.ignite.internal.IgnitionEx.startConfigurations(IgnitionEx.java:1035)
at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:921)
at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:820)
at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:690)
at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:659)
at org.apache.ignite.Ignition.start(Ignition.java:346)
at org.apache.ignite.startup.cmdline.CommandLineStartup.main(CommandLineStartup.java:300)
在启动参数中添加 -DIGNITE_SKIP_CONFIGURATION_CONSISTENCY_CHECK=true
参数即可
网友评论