发现了这个不错的小工具,标记下
def createStructField(name:String,colType:String):StructField={
colType match {
case "java.lang.String" =>{ return StructField(name,StringType,true)}
case "java.lang.Integer" =>{return StructField(name,IntegerType,true)}
case "java.lang.Long" =>{return StructField(name,LongType,true)}
case "java.lang.Boolean" =>{return StructField(name,BooleanType,true)}
case "java.lang.Double" =>{return StructField(name,DoubleType,true)}
case "java.lang.Float" =>{return StructField(name,FloatType,true)}
case "java.sql.Date" =>{return StructField(name,DateType,true)}
case "java.sql.Time" =>{return StructField(name,TimestampType,true)}
case "java.sql.Timestamp" =>{return StructField(name,TimestampType,true)}
case "java.math.BigDecimal" =>{return StructField(name,DecimalType(10,0),true)}
}
}
/**
* 把查出的ResultSet转换成DataFrame
*/
def createResultSetToDF(rs:ResultSet):DataFrame={
val rsmd = rs.getMetaData
val columnTypeList = new ArrayList[String]
val rowSchemaList = new ArrayList[StructField]
for(i <- 1 to rsmd.getColumnCount){
var temp = rsmd.getColumnClassName(i)
temp=temp.substring(temp.lastIndexOf(".")+1)
if("Integer".equals(temp)){
temp="Int";
}
columnTypeList.add(temp)
rowSchemaList.add(createStructField(rsmd.getColumnName(i),rsmd.getColumnClassName(i)))
}
val rowSchema = StructType(Seq(rowSchemaList:_*))
//ResultSet反射类对象
val rsClass = rs.getClass
var count=1;
var resultList = new ArrayList[Row]
var totalDF = session.createDataFrame(new ArrayList[Row], rowSchema)
while (rs.next()) {
count=count+1;
val temp = new ArrayList[Object]
for(i <- 0 to columnTypeList.size()-1){
val method = rsClass.getMethod("get"+columnTypeList.get(i),"aa".getClass)
temp.add(method.invoke(rs, rsmd.getColumnName(i+1)))
}
resultList.add(Row(temp:_*))
if(count%100000==0){
val tempDF = session.createDataFrame(resultList, rowSchema)
totalDF=totalDF.union(tempDF).distinct()
resultList.clear()
}
}
val tempDF = session.createDataFrame(resultList, rowSchema)
totalDF=totalDF.union(tempDF)
return totalDF
}
网友评论