Java writing code -- compare two CSV files in Java
Q. How will you go about writing code for a scenario where you need to compare two CSV files? assume that both CSV files are converted to "List
A. You can use a CSV framework like OpenCSV to convert a csv file to List
Possible scenarios are
One approach to doing this would be to use Java's Set interface (e.g. Set |
// using FileUtils to read in the files. HashSet<String[]> target = new HashSet<String[]>(); //...populate target via OpenCSV HashSet<String[]> generated = new HashSet<String[]>(); //...populate generated via OpenCSV generated.removeAll(target); // generated now contains only the lines which are not in target
The above solution would not work if duplicates are allowed. Here is the one possible solution when there are duplicates.
Firstly, write down the logical steps before start coding
1. Compare the number of items in both target and generated lists.
2. If the number of items are same, compare each item.
3. If no if items or any contents of individual items differ, the contents are not same, and identify the contents that differ.
3.1. To identify the contents that differ, both the target and generated lists need to be sorted.
3.2. Loop through and compare each item from both the lists. If either all the target or generated list has been completed exit the loop.
3.3. If there are any target or generated items not processed yet, process them in a new loop.
3.2.1 Inside the loop 3.2, there are 3 possible outcomes.
A. The item is in target but not in generated.
B. The item is in both target and generated.
C. The item is not in generated but is in target.
The compareTo(..) method is used as it will return 0, -1, or 1 meaning contents are equal, target is less than generated, and target is greater than the generated.
package com.myapp.compare; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; import org.springframework.stereotype.Component; @Component(value = "simpleCSVCompare") public class SimpleCSVCompareImpl implements SimpleCSVCompare { //compare target and generated CSV lines public CSVCompareResult compareCSVLines(List<String[]> targetLines, List<String[]> generatedLines, CSVCompareResult result){ //Step1: Number of lines differ if(targetLines.size() != generatedLines.size()){ result.addMismatchingLines("Line sizes don't match: " + " target=" + targetLines.size() + ", generated=" + generatedLines.size()); result.setStatus(CSVCompareResult.FileCompareStatus.UNMATCHED); } //Step 2: Contents differ if(targetLines.size() == generatedLines.size()){ for (int i = 0; i < targetLines.size(); i++) { String[] targetLine = targetLines.get(i); String[] genLine = generatedLines.get(i); if(!Arrays.deepEquals(targetLine, genLine)){ result.addMismatchingLines("Line contents don't match."); result.setStatus(CSVCompareResult.FileCompareStatus.UNMATCHED); break; } } } //Step 3: Identify the differing lines if(CSVCompareResult.FileCompareStatus.UNMATCHED == result.getStatus()){ sortedList(targetLines); sortedList(generatedLines); evaluateCSVLineDifferences(targetLines,generatedLines,result); } return result; } public CSVCompareResult evaluateCSVLineDifferences(List<String[]> targetLines, List<String[]> generatedLines, CSVCompareResult result) { result.setNoOfGeneratedLines(generatedLines.size()); result.setNoOfTargetLines(targetLines.size()); int genIndex = 0; int targIndex = 0; String[] lineTarget = targetLines.get(targIndex); String[] lineGen = generatedLines.get(genIndex); boolean targetDone = false; boolean generatedDone = false; while (!targetDone && !generatedDone) { //target line is less than the generated line if (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) < 0) { while (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) < 0 && !targetDone) { result.addMismatchingLines("TARGET:" + Arrays.deepToString(lineTarget)); if (targIndex < targetLines.size() - 1) { lineTarget = targetLines.get(++targIndex); } else { targetDone = true; } } //target and generated lines are same } else if (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) == 0) { if (targIndex < targetLines.size() - 1) { lineTarget = targetLines.get(++targIndex); } else { targetDone = true; } if (genIndex < generatedLines.size() - 1) { lineGen = generatedLines.get(++genIndex); } else { generatedDone = true; } //target line is greater than the generated line } else if (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) > 0) { while (Arrays.deepToString(lineTarget).compareTo(Arrays.deepToString(lineGen)) > 0 && !generatedDone) { result.addMismatchingLines("GENERATED:" + Arrays.deepToString(lineGen)); if (genIndex < generatedLines.size() - 1) { lineGen = generatedLines.get(++genIndex); } else { generatedDone = true; } } } } //process any target lines not processed while (!targetDone) { result.addMismatchingLines("TARGET:" + Arrays.deepToString(lineTarget)); if (targIndex < targetLines.size() - 1) { lineTarget = targetLines.get(++targIndex); } else { targetDone = true; } } //process any generated lines not processed while (!generatedDone) { result.addMismatchingLines("GENERATED:" + Arrays.deepToString(lineGen)); if (genIndex < generatedLines.size() - 1) { lineGen = generatedLines.get(++genIndex); } else { generatedDone = true; } } return result; } public void sortedList(List<String[]> input){ Collections.sort(input, new Comparator<String[]>() { @Override public int compare(String[] o1, String[] o2) { return Arrays.deepToString(o1).compareTo(Arrays.deepToString(o2)); } }); } public static void main(String[] args) { String[] targA1 = { "a1" }; String[] genA1 = { "a1" }; String[] targA2 = { "a2" }; String[] genA2 = { "a2" }; String[] targA3 = { "a3" }; String[] genA3 = { "a3" }; String[] targA4 = { "a4" }; String[] genA4 = { "a4" }; String[] targA5 = { "a5" }; String[] genA6 = { "a6" }; List<String[]> targetLines = new ArrayList<String[]>(); List<String[]> generatedLines = new ArrayList<String[]>(); targetLines.add(targA1); targetLines.add(targA2); targetLines.add(targA2); targetLines.add(targA3); targetLines.add(targA4); targetLines.add(targA5); generatedLines.add(genA1); generatedLines.add(genA2); generatedLines.add(genA3); generatedLines.add(genA4); generatedLines.add(genA6); CSVCompareResult result = new CSVCompareResult(); new SimpleCSVCompareImpl().evaluateCSVLineDifferences(targetLines, generatedLines, result); System.out.println(result.getMismatchingLines()); } }
The results can be added to a value object like
package com.myapp.compare; import java.util.ArrayList; import java.util.List; public class CSVCompareResult { public enum FileCompareStatus { MATCHED, UNMATCHED }; private String generatedFileName; private String targetFileName; private int noOfTargetLines; private int noOfGeneratedLines; private FileCompareStatus status = FileCompareStatus.MATCHED; private List<String> mismatchingLines = new ArrayList<String>(20); public String getGeneratedFileName() { return generatedFileName; } public void setGeneratedFileName(String generatedFileName) { this.generatedFileName = generatedFileName; } public String getTargetFileName() { return targetFileName; } public void setTargetFileName(String targetFileName) { this.targetFileName = targetFileName; } public int getNoOfTargetLines() { return noOfTargetLines; } public void setNoOfTargetLines(int noOfTargetLines) { this.noOfTargetLines = noOfTargetLines; } public int getNoOfGeneratedLines() { return noOfGeneratedLines; } public void setNoOfGeneratedLines(int noOfGeneratedLines) { this.noOfGeneratedLines = noOfGeneratedLines; } public List<String> getMismatchingLines() { return mismatchingLines; } public void setMismatchingLines(List<String> mismatchingLineNumbers) { this.mismatchingLines = mismatchingLineNumbers; } public void addMismatchingLines(String lineNumber) { mismatchingLines.add(lineNumber); } public FileCompareStatus getStatus() { return status; } public void setStatus(FileCompareStatus status) { this.status = status; } public String outputResultsAsString(){ StringBuilder sb = new StringBuilder(); sb.append("Files Compared: " + " target=" + targetFileName + ", generated=" + generatedFileName); sb.append("\n"); sb.append("Status:" + status); sb.append("\n"); List<String> mismatchingLines = getMismatchingLines(); for (String msg : mismatchingLines) { sb.append(msg); sb.append("\n"); } return sb.toString(); } }
2 Comments:
From where you are inputting the csv files? And Where you printing the output. It is bit confusing.. Could you please help me out to understand this example. If possible, could you please provide me an example to compare two CSV files with simple java program.
Thanks in Advance.
here is an open yource programm which is comparing csv files, maybe this helps:
http://sourceforge.net/projects/csvcomparator/
Post a Comment
Subscribe to Post Comments [Atom]
<< Home