mercredi 25 février 2015

How to resolve java.lang.ClassNotFoundException error for Apache Spark related web apps?

I am trying to run Apache Spark mllib example in a spring web app. Apache Spark is installed in my server as standalone and I already have the master and one worker running. Here is my simple web app:



import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.classification.SVMModel;
import org.apache.spark.mllib.classification.SVMWithSGD;
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
import org.apache.spark.mllib.linalg.Vectors;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.mllib.linalg.Vector;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.core.io.Resource;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.SparkConf;

import scala.Tuple2;

@RestController
public class SimpleApp implements Serializable{
@RequestMapping("/simpleapp")
public String clusterAnalyze() {
SparkConf conf = new SparkConf().setAppName("Simple Application").setMaster("spark://myserver:7077").set("spark.executor.memory", "6g").set("spark.driver.allowMultipleContexts", "true").set("spark.home","/var/lib/spark-1.2.0-bin-hadoop2.4").setJars(JavaSparkContext.jarOfClass(this.getClass()));
JavaSparkContext sc = new JavaSparkContext(conf);
try{
// making a dataset
List<LabeledPoint> data = new ArrayList();
Random gen = new Random();
for(int i=0; i<1000; i++){
Vector sv = Vectors.sparse(11, new int[] {gen.nextInt(5), 5+gen.nextInt(5)}, new double[] {gen.nextDouble(),gen.nextDouble()});
LabeledPoint pos = new LabeledPoint(((i%2==0)?1.0:0.0), sv);
data.add(pos);
}
// change it to JavaRDD
JavaRDD<LabeledPoint> distData = sc.parallelize(data);
JavaRDD<LabeledPoint> training = distData.sample(false, 0.6, 11L);
training.cache();
JavaRDD<LabeledPoint> test = distData.subtract(training);

// Run training algorithm to build the model.
int numIterations = 5;
final SVMModel model = SVMWithSGD.train(training.rdd(), numIterations);

// Clear the default threshold.
model.clearThreshold();

// Compute raw scores on the test set.
JavaRDD<Tuple2<Object, Object>> scoreAndLabels = test.map(
new Function<LabeledPoint, Tuple2<Object, Object>>() {
public Tuple2<Object, Object> call(LabeledPoint p) {
Double score = model.predict(p.features());
return new Tuple2<Object, Object>(score, p.label());
}
}
);

// Get evaluation metrics.
BinaryClassificationMetrics metrics = new BinaryClassificationMetrics(JavaRDD.toRDD(scoreAndLabels));
double auROC = metrics.areaUnderROC();

return String.valueOf(auROC);

}catch(Exception e){
sc.stop();
return e.getMessage();
}
}
}


I make a war file using gradle and run it in tomcat7. But by running this code I get this error:



boot - 7325 DEBUG [http-bio-8888-exec-3] --- RequestResponseBodyMethodProcessor: Written [Job aborted due to stage failure: Task 1 in stage 10.0 failed 4 times, most recent failure: Lost task 1.3 in stage 10.0 (TID 26, 104.236.201.144): java.lang.ClassNotFoundException: main.java.server.SimpleApp$1
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:274)
at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:59)
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1612)
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1771)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:62)
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:87)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:60)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:56)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)


I searched a lot and they are all telling me that I should make a fat jar and include it with the project or I should use .setJars(JavaSparkContext.jarOfClass(this.getClass())) which I am already using. Also, I do not want to submit the app using spark-submit and I have to run this in the web app. Anyone knows how should I resolve this? Should I have a WAR file and a JAR file of the project? Then, how should I use the JAR file?


Aucun commentaire:

Enregistrer un commentaire