jeudi 19 février 2015

How to optimize my performances using FlatFileItemReader and Asynchronous Processors

I have a simple csv file with ~400,000 line(one column only) It takes me alot of time to read the records and process them


the processor validating records against couchbase


the writer - writing into remote topic Takes me around 30 mins. thats insane.


I read that flatfileItemreader is not thread safe. so my chunk value is 1.


I read the Asynchronous processing could assist. but I cant see any improvements.


Thats my code:



@Configuration
@EnableBatchProcessing
public class NotificationFileProcessUploadedFileJob {


@Value("${expected.snid.header}")
public String snidHeader;

@Value("${num.of.processing.chunks.per.file}")
public int numOfProcessingChunksPerFile;

@Autowired
private InfrastructureConfigurationConfig infrastructureConfigurationConfig;

private static final String OVERRIDDEN_BY_EXPRESSION = null;


@Inject
private JobBuilderFactory jobs;

@Inject
private StepBuilderFactory stepBuilderFactory;

@Inject
ExecutionContextPromotionListener executionContextPromotionListener;


@Bean
public Job processUploadedFileJob() throws Exception {
return this.jobs.get("processUploadedFileJob").start((processSnidUploadedFileStep())).build();

}

@Bean
public Step processSnidUploadedFileStep() {
return stepBuilderFactory.get("processSnidFileStep")
.<PushItemDTO, PushItemDTO>chunk(numOfProcessingChunksPerFile)
.reader(snidFileReader(OVERRIDDEN_BY_EXPRESSION))
.processor(asyncItemProcessor())
.writer(asyncItemWriter())
// .throttleLimit(20)
// .taskJobExecutor(infrastructureConfigurationConfig.taskJobExecutor())


// .faultTolerant()
// .skipLimit(10) //default is set to 0
// .skip(MySQLIntegrityConstraintViolationException.class)
.build();
}

@Inject
ItemWriter writer;

@Bean
public AsyncItemWriter asyncItemWriter() {
AsyncItemWriter asyncItemWriter=new AsyncItemWriter();
asyncItemWriter.setDelegate(writer);
return asyncItemWriter;
}


@Bean
@Scope(value = "step", proxyMode = ScopedProxyMode.INTERFACES)
public ItemStreamReader<PushItemDTO> snidFileReader(@Value("#{jobParameters[filePath]}") String filePath) {
FlatFileItemReader<PushItemDTO> itemReader = new FlatFileItemReader<PushItemDTO>();
itemReader.setLineMapper(snidLineMapper());
itemReader.setLinesToSkip(1);
itemReader.setResource(new FileSystemResource(filePath));
return itemReader;
}


@Bean
public AsyncItemProcessor asyncItemProcessor() {

AsyncItemProcessor<PushItemDTO, PushItemDTO> asyncItemProcessor = new AsyncItemProcessor();

asyncItemProcessor.setDelegate(processor(OVERRIDDEN_BY_EXPRESSION, OVERRIDDEN_BY_EXPRESSION, OVERRIDDEN_BY_EXPRESSION,
OVERRIDDEN_BY_EXPRESSION, OVERRIDDEN_BY_EXPRESSION, OVERRIDDEN_BY_EXPRESSION, OVERRIDDEN_BY_EXPRESSION));
asyncItemProcessor.setTaskExecutor(infrastructureConfigurationConfig.taskProcessingExecutor());

return asyncItemProcessor;

}

@Scope(value = "step", proxyMode = ScopedProxyMode.INTERFACES)
@Bean
public ItemProcessor<PushItemDTO, PushItemDTO> processor(@Value("#{jobParameters[pushMessage]}") String pushMessage,
@Value("#{jobParameters[jobId]}") String jobId,
@Value("#{jobParameters[taskId]}") String taskId,
@Value("#{jobParameters[refId]}") String refId,
@Value("#{jobParameters[url]}") String url,
@Value("#{jobParameters[targetType]}") String targetType,
@Value("#{jobParameters[gameType]}") String gameType) {
return new PushItemProcessor(pushMessage, jobId, taskId, refId, url, targetType, gameType);
}

@Bean
public LineMapper<PushItemDTO> snidLineMapper() {
DefaultLineMapper<PushItemDTO> lineMapper = new DefaultLineMapper<PushItemDTO>();
DelimitedLineTokenizer lineTokenizer = new DelimitedLineTokenizer();
lineTokenizer.setDelimiter(",");
lineTokenizer.setStrict(true);
lineTokenizer.setStrict(true);
String[] splittedHeader = snidHeader.split(",");
lineTokenizer.setNames(splittedHeader);
BeanWrapperFieldSetMapper<PushItemDTO> fieldSetMapper = new BeanWrapperFieldSetMapper<PushItemDTO>();
fieldSetMapper.setTargetType(PushItemDTO.class);

lineMapper.setLineTokenizer(lineTokenizer);
lineMapper.setFieldSetMapper(new PushItemFieldSetMapper());
return lineMapper;
}
}


@Bean
@Override
public SimpleAsyncTaskExecutor taskProcessingExecutor() {
SimpleAsyncTaskExecutor simpleAsyncTaskExecutor = new SimpleAsyncTaskExecutor();
simpleAsyncTaskExecutor.setConcurrencyLimit(300);
return simpleAsyncTaskExecutor;
}


How do you think I could improve the processing performances and make them faster? thank you


Aucun commentaire:

Enregistrer un commentaire