美文网首页
Flink清理状态异常排查

Flink清理状态异常排查

作者: Jorvi | 来源:发表于2020-09-03 20:35 被阅读0次

    1. 异常信息

    Exception in thread "main" org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
        at org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:146)
        at org.apache.flink.runtime.minicluster.MiniCluster.executeJobBlocking(MiniCluster.java:638)
        at org.apache.flink.streaming.api.environment.LocalStreamEnvironment.execute(LocalStreamEnvironment.java:123)
        at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1509)
        at org.apache.flink.streaming.api.scala.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.scala:645)
        at org.learn.StateWordCount$.main(StateWordCount.scala:50)
        at org.learn.StateWordCount.main(StateWordCount.scala)
    Caused by: TimerException{java.util.ConcurrentModificationException}
        at org.apache.flink.streaming.runtime.tasks.SystemProcessingTimeService$TriggerTask.run(SystemProcessingTimeService.java:288)
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
        at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
    Caused by: java.util.ConcurrentModificationException
        at java.util.HashMap$HashIterator.nextNode(HashMap.java:1442)
        at java.util.HashMap$KeyIterator.next(HashMap.java:1466)
        at org.learn.function.WordCountProcessFunction.onTimer(WordCountProcessFunction.scala:43)
        at org.apache.flink.streaming.api.operators.KeyedProcessOperator.invokeUserFunction(KeyedProcessOperator.java:94)
        at org.apache.flink.streaming.api.operators.KeyedProcessOperator.onProcessingTime(KeyedProcessOperator.java:78)
        at org.apache.flink.streaming.api.operators.InternalTimerServiceImpl.onProcessingTime(InternalTimerServiceImpl.java:239)
        at org.apache.flink.streaming.runtime.tasks.SystemProcessingTimeService$TriggerTask.run(SystemProcessingTimeService.java:285)
        ... 7 more
        
    

    报错位置是 org.learn.function.WordCountProcessFunction.onTimer(WordCountProcessFunction.scala:43)

    报错原因是java.util.ConcurrentModificationException

    2. 代码

    package org.learn.function
    
    import org.apache.flink.api.common.state.{MapState, MapStateDescriptor}
    import org.apache.flink.configuration.Configuration
    import org.apache.flink.streaming.api.functions.KeyedProcessFunction
    import org.apache.flink.util.Collector
    
    class WordCountProcessFunction extends KeyedProcessFunction[String, (String, Int), (String, Int)] {
    
      private var mapState: MapState[String, (String, Int)] = _
      private var timerState: MapState[Long, Long] = _
    
      override def open(parameters: Configuration): Unit = {
        var mapStateDesc = new MapStateDescriptor[String, (String, Int)]("valueStateDesc", classOf[String], classOf[(String, Int)])
        mapState = getRuntimeContext.getMapState(mapStateDesc)
    
        val timerStateDesc = new MapStateDescriptor[Long, Long]("timerStateDesc", classOf[Long], classOf[Long])
        timerState = getRuntimeContext.getMapState(timerStateDesc)
      }
    
      override def processElement(value: (String, Int), ctx: KeyedProcessFunction[String, (String, Int), (String, Int)]#Context, out: Collector[(String, Int)]): Unit = {
    
        var currentState: (String, Int) = mapState.get(value._1)
        if (null == currentState) {
          currentState = (value._1, 0)
    
          // TTL时间
          val ttlTime: Long = System.currentTimeMillis() - 30 * 1000 // 设置一个历史时间
          ctx.timerService().registerProcessingTimeTimer(ttlTime)
          timerState.put(ttlTime, ttlTime)
          timerState.put(ttlTime - 10, ttlTime - 10)
        }
    
        var newState: (String, Int) = (currentState._1, currentState._2 + value._2)
        mapState.put(value._1, newState)
      }
    
      override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[String, (String, Int), (String, Int)]#OnTimerContext, out: Collector[(String, Int)]): Unit = {
    
        System.out.println("clear..." + " timestamp: " + timestamp + " currentTime: " + System.currentTimeMillis() + " timerState: ")
        val iter = timerState.keys().iterator()
        while (iter.hasNext) {
          val key = iter.next()
          System.out.println("key: " + key + " value: " + timerState.get(key))
          if (key < System.currentTimeMillis()) {
            timerState.remove(key)
          }
        }
    
        mapState.clear()
      }
    }
    
    

    第 43 行:val key = iter.next()

    错误原因:利用迭代器遍历 map 时,如果同时调用 map.remove(Object key) 做移除操作,就会报 java.util.ConcurrentModificationException 异常。

    改正方法:利用迭代器的 remove 方法 iter.remove() 做移除操作,则不会抛出该异常信息。

    3. 源码

    以 HashMap 为例,看看源码。

    • 进入java.util.HashMap.java

      public class HashMap<K,V> extends AbstractMap<K,V>
          implements Map<K,V>, Cloneable, Serializable {
          
          
          // HashMap的remove方法
          public V remove(Object key) {
              Node<K,V> e;
              return (e = removeNode(hash(key), key, null, false, true)) == null ?
                  null : e.value;
          }
          
          
          final Node<K,V> removeNode(int hash, Object key, Object value,
                                     boolean matchValue, boolean movable) {
              Node<K,V>[] tab; Node<K,V> p; int n, index;
              if ((tab = table) != null && (n = tab.length) > 0 &&
                  (p = tab[index = (n - 1) & hash]) != null) {
                  Node<K,V> node = null, e; K k; V v;
                  if (p.hash == hash &&
                      ((k = p.key) == key || (key != null && key.equals(k))))
                      node = p;
                  else if ((e = p.next) != null) {
                      if (p instanceof TreeNode)
                          node = ((TreeNode<K,V>)p).getTreeNode(hash, key);
                      else {
                          do {
                              if (e.hash == hash &&
                                  ((k = e.key) == key ||
                                   (key != null && key.equals(k)))) {
                                  node = e;
                                  break;
                              }
                              p = e;
                          } while ((e = e.next) != null);
                      }
                  }
                  if (node != null && (!matchValue || (v = node.value) == value ||
                                       (value != null && value.equals(v)))) {
                      if (node instanceof TreeNode)
                          ((TreeNode<K,V>)node).removeTreeNode(this, tab, movable);
                      else if (node == p)
                          tab[index] = node.next;
                      else
                          p.next = node.next;
                      ++modCount;
                      --size;
                      afterNodeRemoval(node);
                      return node;
                  }
              }
              return null;
          }
          
          
        final class KeyIterator extends HashIterator
              implements Iterator<K> {
              public final K next() { return nextNode().key; }
          }
      
      
          // 内部类
          abstract class HashIterator {
              Node<K,V> next;        // next entry to return
              Node<K,V> current;     // current entry
              int expectedModCount;  // for fast-fail
              int index;             // current slot
      
              HashIterator() {
                  expectedModCount = modCount;
                  Node<K,V>[] t = table;
                  current = next = null;
                  index = 0;
                  if (t != null && size > 0) { // advance to first entry
                      do {} while (index < t.length && (next = t[index++]) == null);
                  }
              }
      
              public final boolean hasNext() {
                  return next != null;
              }
      
              final Node<K,V> nextNode() {
                  Node<K,V>[] t;
                  Node<K,V> e = next;
                  if (modCount != expectedModCount)
                      throw new ConcurrentModificationException();
                  if (e == null)
                      throw new NoSuchElementException();
                  if ((next = (current = e).next) == null && (t = table) != null) {
                      do {} while (index < t.length && (next = t[index++]) == null);
                  }
                  return e;
              }
      
              // 迭代器的remove方法
              public final void remove() {
                  Node<K,V> p = current;
                  if (p == null)
                      throw new IllegalStateException();
                  if (modCount != expectedModCount)
                      throw new ConcurrentModificationException();
                  current = null;
                  K key = p.key;
                  removeNode(hash(key), key, null, false, false);
                  expectedModCount = modCount;
              }
          }
      
      1. 调用迭代器的 next() 方法,进而调用 nextNode() 方法
      2. nextNode() 方法中会进行判断,如果 modCount != expectedModCount,则抛出java.util.ConcurrentModificationException 异常
      3. 如果调用 HashMap.remove() 方法,则进而会调用 removeNode() 方法,在 removeNode() 方法的最后,会对 modCount+1,此时后面再调用迭代器的 next() 方法时,就会抛出java.util.ConcurrentModificationException异常
      4. 如果调用迭代器的 remove() 方法,该方法最后会 expectedModCount = modCount,此时后面再调用迭代器的 next() 方法时,不会抛出异常

    相关文章

      网友评论

          本文标题:Flink清理状态异常排查

          本文链接:https://www.haomeiwen.com/subject/gugosktx.html