from pyspark.ml.classification import LinearSVC
from pyspark.sql import SparkSession
spark = SparkSession\
.builder\
.appName("linearSVC Example")\
.getOrCreate()
training = spark.read.format("libsvm").load("sample_libsvm_data.txt")
# threshold:用于二分类的阈值
# aggregationDepth:这个参数在很多地方见过,treeAggregate相比Aggregate更高效,
# 它避免一次性将所有分区的结果传到汇总端(可能内存不足,可能分区太多),所以它先对分区做些整理、合并等操作,再传递结果。
lsvc = LinearSVC(maxIter=10, regParam=0.1)
# Fit the model
lsvcModel = lsvc.fit(training)
# Print the coefficients and intercept for linear SVC
print("Coefficients: " + str(lsvcModel.coefficients))
print("Intercept: " + str(lsvcModel.intercept))
网友评论