Here are the examples of the java api org.apache.spark.api.java.JavaSparkContext taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
830 Examples
19
Source : PageOneStepConvertRateSpark.java
with GNU General Public License v3.0
from wlhbdp
with GNU General Public License v3.0
from wlhbdp
/**
* 页面切片生成与匹配算法
* @param sc
* @param sessionid2actionsRDD
* @param taskParam
* @return
*/
private static JavaPairRDD<String, Integer> generateAndMatchPageSplit(JavaSparkContext sc, JavaPairRDD<String, Iterable<Row>> sessionid2actionsRDD, JSONObject taskParam) {
String targetPageFlow = ParamUtils.getParam(taskParam, Constants.PARAM_TARGET_PAGE_FLOW);
final Broadcast<String> targetPageFlowBroadcast = sc.broadcast(targetPageFlow);
return sessionid2actionsRDD.flatMapToPair(new PairFlatMapFunction<Tuple2<String, Iterable<Row>>, String, Integer>() {
private final Long serialVersionUID = 1L;
@Override
public Iterator<Tuple2<String, Integer>> call(Tuple2<String, Iterable<Row>> tuple) throws Exception {
// 定义返回list
List<Tuple2<String, Integer>> list = new ArrayList<Tuple2<String, Integer>>();
// 获取到当前session的访问行为的迭代器
Iterator<Row> iterator = tuple._2.iterator();
// 获取使用者指定的页面流
// 使用者指定的页面流,1,2,3,4,5,6,7
// 1->2的转化率是多少?2->3的转化率是多少?
String[] targetPages = targetPageFlowBroadcast.value().split(",");
// 这里,我们拿到的session的访问行为,默认情况下是乱序的
// 比如说,正常情况下,我们希望拿到的数据,是按照时间顺序排序的
// 但是问题是,默认是不排序的
// 所以,我们第一件事情,对session的访问行为数据按照时间进行排序
// 举例,反例
// 比如,3->5->4->10->7
// 3->4->5->7->10
// 排序
List<Row> rows = new ArrayList<Row>();
while (iterator.hasNext()) {
rows.add(iterator.next());
}
Collections.sort(rows, new Comparator<Row>() {
@Override
public int compare(Row o1, Row o2) {
String actionTime1 = o1.getString(4);
String actionTime2 = o2.getString(4);
Date date1 = DateUtils.parseTime(actionTime1);
Date date2 = DateUtils.parseTime(actionTime2);
return (int) (date1.getTime() - date2.getTime());
}
});
// 页面切片的生成,以及页面流的匹配
Long lastPageId = null;
for (Row row : rows) {
Long pageid = row.getLong(3);
if (lastPageId == null) {
lastPageId = pageid;
continue;
}
// 生成一个页面切片
// 3,5,2,1,8,9
// lastPageId=3
// 5,切片,3_5
String pageSplit = lastPageId + "_" + pageid;
// 对这个切片判断一下,是否在用户指定的页面流中
for (int i = 1; i < targetPages.length; i++) {
// 比如说,用户指定的页面流是3,2,5,8,1
// 遍历的时候,从索引1开始,就是从第二个页面开始
// 3_2
String targetPageSplit = targetPages[i - 1] + "_" + targetPages[i];
if (pageSplit.equals(targetPageSplit)) {
list.add(new Tuple2<String, Integer>(pageSplit, 1));
break;
}
}
lastPageId = pageid;
}
return list.iterator();
}
});
}
19
Source : BlocksFromEntityIndexTest.java
with Apache License 2.0
from vefthym
with Apache License 2.0
from vefthym
/**
* @author vefthym
*/
public clreplaced BlocksFromEnreplacedyIndexTest {
SparkSession spark;
JavaSparkContext jsc;
public BlocksFromEnreplacedyIndexTest() {
}
@BeforeClreplaced
public static void setUpClreplaced() {
}
@AfterClreplaced
public static void tearDownClreplaced() {
}
@Before
public void setUp() {
// only for local mode
System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Doreplacedents\\hadoop_home");
spark = SparkSession.builder().appName("test").config("spark.sql.warehouse.dir", "/file:/tmp").config("spark.executor.instances", 1).config("spark.executor.cores", 1).config("spark.executor.memory", "1G").config("spark.driver.maxResultSize", "1g").config("spark.master", "local").getOrCreate();
jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
}
@After
public void tearDown() {
}
/**
* Test of run method, of clreplaced BlocksFromEnreplacedyIndex.
*/
@Test
public void testRun() {
System.out.println("blocks from enreplacedy index");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
LongAcreplacedulator BLOCK_replacedIGNMENTS = jsc.sc().longAcreplacedulator();
BlockFilteringAdvanced blockFiltering = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> enreplacedyIndex = blockFiltering.run(blockingInput, BLOCK_replacedIGNMENTS);
BlocksFromEnreplacedyIndex instance = new BlocksFromEnreplacedyIndex();
LongAcreplacedulator cleanBlocksAcreplaced = jsc.sc().longAcreplacedulator();
LongAcreplacedulator numComparisons = jsc.sc().longAcreplacedulator();
JavaPairRDD<Integer, IntArrayList> result = instance.run(enreplacedyIndex, cleanBlocksAcreplaced, numComparisons);
List<Tuple2<Integer, IntArrayList>> expResult = new ArrayList<>();
expResult.add(new Tuple2<>(0, new IntArrayList(new int[] { 1, 2, 3, 4, -1, -2, -3, -4 })));
expResult.add(new Tuple2<>(1, new IntArrayList(new int[] { 3, 4, 5, -1, -5 })));
expResult.add(new Tuple2<>(2, new IntArrayList(new int[] { 5, -5 })));
JavaPairRDD<Integer, IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
expResultList.stream().forEach(lisreplacedem -> Collections.sort(lisreplacedem._2()));
resultList.stream().forEach(lisreplacedem -> Collections.sort(lisreplacedem._2()));
System.out.println("Result: " + Arrays.toString(resultList.toArray()));
System.out.println("Expect: " + Arrays.toString(expResultList.toArray()));
replacedertEquals((long) cleanBlocksAcreplaced.value(), 3);
replacedertEquals((long) numComparisons.value(), 23);
replacedertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
// replacedertEquals(expResultRDD, result);
}
}
19
Source : BlockFilteringAdvancedTest.java
with Apache License 2.0
from vefthym
with Apache License 2.0
from vefthym
/**
* @author vefthym
*/
public clreplaced BlockFilteringAdvancedTest {
SparkSession spark;
JavaSparkContext jsc;
public BlockFilteringAdvancedTest() {
}
@BeforeClreplaced
public static void setUpClreplaced() {
}
@AfterClreplaced
public static void tearDownClreplaced() {
}
@Before
public void setUp() {
// only for local mode
System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Doreplacedents\\hadoop_home");
spark = SparkSession.builder().appName("test").config("spark.sql.warehouse.dir", "/file:/tmp").config("spark.executor.instances", 1).config("spark.executor.cores", 1).config("spark.executor.memory", "1G").config("spark.driver.maxResultSize", "1g").config("spark.master", "local").getOrCreate();
jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
}
@After
public void tearDown() {
}
/**
* Test of run method, of clreplaced BlockFilteringAdvanced.
*/
@Test
public void testRun() {
System.out.println("getEnreplacedyBlocksAdvanced");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
LongAcreplacedulator BLOCK_replacedIGNMENTS = jsc.sc().longAcreplacedulator();
BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> result = instance.run(blockingInput, BLOCK_replacedIGNMENTS);
List<Tuple2<Integer, IntArrayList>> expResult = new ArrayList<>();
expResult.add(new Tuple2<>(1, new IntArrayList(new int[] { 0 })));
expResult.add(new Tuple2<>(2, new IntArrayList(new int[] { 0 })));
expResult.add(new Tuple2<>(3, new IntArrayList(new int[] { 1, 0 })));
expResult.add(new Tuple2<>(4, new IntArrayList(new int[] { 1, 0 })));
expResult.add(new Tuple2<>(5, new IntArrayList(new int[] { 2, 1 })));
expResult.add(new Tuple2<>(-1, new IntArrayList(new int[] { 1, 0 })));
expResult.add(new Tuple2<>(-2, new IntArrayList(new int[] { 0 })));
expResult.add(new Tuple2<>(-3, new IntArrayList(new int[] { 0 })));
expResult.add(new Tuple2<>(-4, new IntArrayList(new int[] { 0 })));
expResult.add(new Tuple2<>(-5, new IntArrayList(new int[] { 2, 1 })));
JavaPairRDD<Integer, IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
System.out.println("Result: " + Arrays.toString(resultList.toArray()));
System.out.println("Expect: " + Arrays.toString(expResultList.toArray()));
replacedertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
replacedertEquals((long) BLOCK_replacedIGNMENTS.value(), 15);
}
/**
* Test of parseBlockCollection method, of clreplaced BlockFilteringAdvanced.
*/
@Test
public void testParseBlockCollection() {
System.out.println("parseBlockCollection");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> result = instance.parseBlockCollection(blockingInput);
List<Tuple2<Integer, IntArrayList>> dummyBlocksParsed = new ArrayList<>();
dummyBlocksParsed.add(new Tuple2<>(0, new IntArrayList(new int[] { 1, 2, 3, 4, 5, -1, -2, -3, -4, -5 })));
dummyBlocksParsed.add(new Tuple2<>(1, new IntArrayList(new int[] { 3, 4, 5, -1, -5 })));
dummyBlocksParsed.add(new Tuple2<>(2, new IntArrayList(new int[] { 5, -5 })));
dummyBlocksParsed.add(new Tuple2<>(3, new IntArrayList(new int[] { 5 })));
JavaPairRDD<Integer, IntArrayList> expResult = jsc.parallelizePairs(dummyBlocksParsed);
List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
List<Tuple2<Integer, IntArrayList>> expResultList = expResult.collect();
System.out.println("Result: " + Arrays.toString(resultList.toArray()));
System.out.println("Expect: " + Arrays.toString(expResultList.toArray()));
replacedertEquals(resultList, expResultList);
}
/**
* Test of getEnreplacedyBlocksAdvanced method, of clreplaced BlockFilteringAdvanced.
* @throws java.lang.IllegalAccessException
* @throws java.lang.reflect.InvocationTargetException
* @throws java.lang.NoSuchMethodException
*/
@Test
public void testGetEnreplacedyBlocksAdvanced() throws IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException {
System.out.println("getEnreplacedyBlocksAdvanced");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> parsedBlocks = instance.parseBlockCollection(blockingInput);
Method method = BlockFilteringAdvanced.clreplaced.getDeclaredMethod("getEnreplacedyBlocksAdvanced", JavaPairRDD.clreplaced);
method.setAccessible(true);
JavaPairRDD<Integer, Tuple2<Integer, Integer>> result = (JavaPairRDD<Integer, Tuple2<Integer, Integer>>) method.invoke(instance, parsedBlocks);
List<Tuple2<Integer, Tuple2<Integer, Integer>>> expResult = new ArrayList<>();
expResult.add(new Tuple2<>(1, new Tuple2<>(0, 5)));
expResult.add(new Tuple2<>(2, new Tuple2<>(0, 5)));
expResult.add(new Tuple2<>(3, new Tuple2<>(0, 5)));
expResult.add(new Tuple2<>(4, new Tuple2<>(0, 5)));
expResult.add(new Tuple2<>(5, new Tuple2<>(0, 5)));
expResult.add(new Tuple2<>(-1, new Tuple2<>(0, 5)));
expResult.add(new Tuple2<>(-2, new Tuple2<>(0, 5)));
expResult.add(new Tuple2<>(-3, new Tuple2<>(0, 5)));
expResult.add(new Tuple2<>(-4, new Tuple2<>(0, 5)));
expResult.add(new Tuple2<>(-5, new Tuple2<>(0, 5)));
expResult.add(new Tuple2<>(3, new Tuple2<>(1, 3)));
expResult.add(new Tuple2<>(4, new Tuple2<>(1, 3)));
expResult.add(new Tuple2<>(5, new Tuple2<>(1, 3)));
expResult.add(new Tuple2<>(-1, new Tuple2<>(1, 3)));
expResult.add(new Tuple2<>(-5, new Tuple2<>(1, 3)));
expResult.add(new Tuple2<>(5, new Tuple2<>(2, 1)));
expResult.add(new Tuple2<>(-5, new Tuple2<>(2, 1)));
// expResult.add(new Tuple2<>(5, new Tuple2<>(3,0))); //null result
JavaPairRDD<Integer, Tuple2<Integer, Integer>> expResultRDD = jsc.parallelizePairs(expResult);
List<Tuple2<Integer, Tuple2<Integer, Integer>>> resultList = result.collect();
List<Tuple2<Integer, Tuple2<Integer, Integer>>> expResultList = expResultRDD.collect();
System.out.println("Result: " + Arrays.toString(resultList.toArray()));
System.out.println("Expect: " + Arrays.toString(expResultList.toArray()));
replacedertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
}
/**
* Test of getEnreplacedyIndex method, of clreplaced BlockFilteringAdvanced.
*/
@Test
public void testGetEnreplacedyIndex() throws NoSuchMethodException, IllegalAccessException, IllegalArgumentException, InvocationTargetException {
System.out.println("getEnreplacedyIndex");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> parsedBlocks = instance.parseBlockCollection(blockingInput);
Method method1 = BlockFilteringAdvanced.clreplaced.getDeclaredMethod("getEnreplacedyBlocksAdvanced", JavaPairRDD.clreplaced);
method1.setAccessible(true);
JavaPairRDD<Integer, Tuple2<Integer, Integer>> enreplacedyBlocks = (JavaPairRDD<Integer, Tuple2<Integer, Integer>>) method1.invoke(instance, parsedBlocks);
Method method2 = BlockFilteringAdvanced.clreplaced.getDeclaredMethod("getEnreplacedyIndex", JavaPairRDD.clreplaced, LongAcreplacedulator.clreplaced);
method2.setAccessible(true);
LongAcreplacedulator BLOCK_replacedIGNMENTS = jsc.sc().longAcreplacedulator();
JavaPairRDD<Integer, IntArrayList> result = (JavaPairRDD<Integer, IntArrayList>) method2.invoke(instance, enreplacedyBlocks, BLOCK_replacedIGNMENTS);
// final int MAX_BLOCKS = ((Double)Math.floor(3*numBlocks/4+1)).intValue(); //|_ 3|Bi|/4+1 _| //preprocessing
List<Tuple2<Integer, IntArrayList>> expResult = new ArrayList<>();
expResult.add(new Tuple2<>(-2, new IntArrayList(new int[] { 0 })));
expResult.add(new Tuple2<>(4, new IntArrayList(new int[] { 1, 0 })));
expResult.add(new Tuple2<>(-1, new IntArrayList(new int[] { 1, 0 })));
expResult.add(new Tuple2<>(-5, new IntArrayList(new int[] { 2, 1 })));
expResult.add(new Tuple2<>(-4, new IntArrayList(new int[] { 0 })));
expResult.add(new Tuple2<>(1, new IntArrayList(new int[] { 0 })));
expResult.add(new Tuple2<>(-3, new IntArrayList(new int[] { 0 })));
expResult.add(new Tuple2<>(3, new IntArrayList(new int[] { 1, 0 })));
expResult.add(new Tuple2<>(5, new IntArrayList(new int[] { 2, 1 })));
expResult.add(new Tuple2<>(2, new IntArrayList(new int[] { 0 })));
JavaPairRDD<Integer, IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
System.out.println("Result: " + Arrays.toString(resultList.toArray()));
System.out.println("Expect: " + Arrays.toString(expResultList.toArray()));
replacedertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
replacedertEquals((long) BLOCK_replacedIGNMENTS.value(), 15);
}
}
19
Source : EntityBasedCNPMapPhaseTest.java
with Apache License 2.0
from vefthym
with Apache License 2.0
from vefthym
/**
* @author vefthym
*/
public clreplaced EnreplacedyBasedCNPMapPhaseTest {
SparkSession spark;
JavaSparkContext jsc;
public EnreplacedyBasedCNPMapPhaseTest() {
}
@BeforeClreplaced
public static void setUpClreplaced() {
}
@AfterClreplaced
public static void tearDownClreplaced() {
}
@Before
public void setUp() {
// only for local mode
System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Doreplacedents\\hadoop_home");
spark = SparkSession.builder().appName("test").config("spark.sql.warehouse.dir", "/file:/tmp").config("spark.executor.instances", 1).config("spark.executor.cores", 1).config("spark.executor.memory", "1G").config("spark.driver.maxResultSize", "1g").config("spark.master", "local").getOrCreate();
jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
}
@After
public void tearDown() {
}
/**
* Test of getMapOutput method, of clreplaced EnreplacedyBasedCNPMapPhase.
*/
/*
@Test
public void testGetMapOutput() {
System.out.println("getMapOutput");
JavaPairRDD<Integer, IntArrayList> blocksFromEI = null;
JavaPairRDD<Integer, IntArrayList> expResult = null;
JavaPairRDD<Integer, IntArrayList> result = CNPMapPhase.getMapOutput(blocksFromEI);
replacedertEquals(expResult, result);
// TODO review the generated test code and remove the default call to fail.
fail("The test case is a prototype.");
}*/
/**
* Test of getMapOutputWJS method, of clreplaced CNPMapPhase.
*/
@Test
public void testGetMapOutputWJS() {
System.out.println("getMapOutputWJS");
System.out.println("blocks from enreplacedy index");
List<String> dummyBlocks = new ArrayList<>();
dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
dummyBlocks.add("1\t3#4#5#;-1#-5#");
dummyBlocks.add("2\t5#;-5#");
dummyBlocks.add("3\t5#;");
JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
LongAcreplacedulator BLOCK_replacedIGNMENTS = jsc.sc().longAcreplacedulator();
BlockFilteringAdvanced blockFiltering = new BlockFilteringAdvanced();
JavaPairRDD<Integer, IntArrayList> enreplacedyIndex = blockFiltering.run(blockingInput, BLOCK_replacedIGNMENTS);
BlocksFromEnreplacedyIndex bfei = new BlocksFromEnreplacedyIndex();
LongAcreplacedulator cleanBlocksAcreplaced = jsc.sc().longAcreplacedulator();
LongAcreplacedulator numComparisons = jsc.sc().longAcreplacedulator();
JavaPairRDD<Integer, IntArrayList> filteredBlocks = bfei.run(enreplacedyIndex, cleanBlocksAcreplaced, numComparisons);
List<Tuple2<Integer, IntArrayList>> tweakedBlocks = new ArrayList<>(filteredBlocks.collect());
// this should not alter the results
tweakedBlocks.add(new Tuple2<>(-1, new IntArrayList(new int[] { -100 })));
filteredBlocks = jsc.parallelizePairs(tweakedBlocks);
JavaPairRDD<Integer, IntArrayList> result = CNPMapPhase.getMapOutputWJS(filteredBlocks);
List<Tuple2<Integer, IntArrayList>> expResult = new ArrayList<>();
expResult.add(new Tuple2<>(4, new IntArrayList(new int[] { 4, -2, -1, -4, -3 })));
expResult.add(new Tuple2<>(1, new IntArrayList(new int[] { 4, -2, -1, -4, -3 })));
expResult.add(new Tuple2<>(3, new IntArrayList(new int[] { 4, -2, -1, -4, -3 })));
expResult.add(new Tuple2<>(2, new IntArrayList(new int[] { 4, -2, -1, -4, -3 })));
expResult.add(new Tuple2<>(-2, new IntArrayList(new int[] { 4, 4, 1, 3, 2 })));
expResult.add(new Tuple2<>(-1, new IntArrayList(new int[] { 4, 4, 1, 3, 2 })));
expResult.add(new Tuple2<>(-4, new IntArrayList(new int[] { 4, 4, 1, 3, 2 })));
expResult.add(new Tuple2<>(-3, new IntArrayList(new int[] { 4, 4, 1, 3, 2 })));
expResult.add(new Tuple2<>(4, new IntArrayList(new int[] { 3, -1, -5 })));
expResult.add(new Tuple2<>(3, new IntArrayList(new int[] { 3, -1, -5 })));
expResult.add(new Tuple2<>(5, new IntArrayList(new int[] { 3, -1, -5 })));
expResult.add(new Tuple2<>(-1, new IntArrayList(new int[] { 2, 4, 3, 5 })));
expResult.add(new Tuple2<>(-5, new IntArrayList(new int[] { 2, 4, 3, 5 })));
expResult.add(new Tuple2<>(5, new IntArrayList(new int[] { 1, -5 })));
expResult.add(new Tuple2<>(-5, new IntArrayList(new int[] { 1, 5 })));
JavaPairRDD<Integer, IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
// expResultList.stream().forEach(lisreplacedem -> Collections.sort(lisreplacedem._2()));
// resultList.stream().forEach(lisreplacedem -> Collections.sort(lisreplacedem._2()));
System.out.println("Result: " + Arrays.toString(resultList.toArray()));
System.out.println("Expect: " + Arrays.toString(expResultList.toArray()));
replacedertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
}
}
19
Source : EvaluateLabelMatchingResults.java
with Apache License 2.0
from vefthym
with Apache License 2.0
from vefthym
public static void main(String[] args) {
String tmpPath;
String master;
String inputTriples1, inputTriples2, enreplacedyIds1, enreplacedyIds2;
String resultsPath, groundTruthPath;
if (args.length == 0) {
// only for local mode
System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Doreplacedents\\hadoop_home");
tmpPath = "/file:C:\\tmp";
master = "local[2]";
inputTriples1 = "";
inputTriples2 = "";
enreplacedyIds1 = "";
enreplacedyIds2 = "";
resultsPath = "/file:C:\\Users\\VASILIS\\Doreplacedents\\OAEI_Datasets\\exportedBlocks\\testInput";
groundTruthPath = "";
} else if (args.length == 5) {
tmpPath = "/file:/tmp";
// master = "spark://master:7077";
inputTriples1 = args[0];
inputTriples2 = args[1];
enreplacedyIds1 = args[2];
enreplacedyIds2 = args[3];
groundTruthPath = args[4];
} else {
System.out.println("You can run match evaluation with the following arguments:" + "0: inputTriples1" + "1: inputTriples2" + "2: enreplacedyIds1" + "3: enreplacedyIds2" + "4: ground truth path");
return;
}
String appName = "Evaluation of label matching";
SparkSession spark = Utils.setUpSpark(appName, 288, 8, 3, tmpPath);
int PARALLELISM = spark.sparkContext().getConf().getInt("spark.default.parallelism", 144);
JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
// //////////////////////
// start the processing//
// //////////////////////
System.out.println("Starting the evaluation...");
// YAGO-IMDb
Set<String> labelAtts1 = new HashSet<>(Arrays.asList("rdfs:label", "label", "skos:prefLabel"));
Set<String> labelAtts2 = labelAtts1;
String GT_SEPARATOR = ",";
if (groundTruthPath.contains("music")) {
GT_SEPARATOR = " ";
// BBCmusic
labelAtts1 = new HashSet<>(Arrays.asList("<http://purl.org/dc/elements/1.1/replacedle>", "<http://open.vocab.org/terms/sortLabel>", "<http://xmlns.com/foaf/0.1/name>"));
labelAtts2 = new HashSet<>(Arrays.asList("<http://www.w3.org/2000/01/rdf-schema#label>", "<http://dbpedia.org/property/name>", "<http://xmlns.com/foaf/0.1/name>"));
}
if (inputTriples1.contains("rexa")) {
labelAtts1 = new HashSet<>(Arrays.asList("http://xmlns.com/foaf/0.1/name", "http://www.w3.org/2000/01/rdf-schema#label"));
labelAtts2 = labelAtts1;
}
String SEPARATOR = (inputTriples1.endsWith(".tsv")) ? "\t" : " ";
// load the results
JavaPairRDD<Integer, Integer> matches = new LabelMatchingHeuristic().getMatchesFromLabels(jsc.textFile(inputTriples1, PARALLELISM), jsc.textFile(inputTriples2, PARALLELISM), jsc.textFile(enreplacedyIds1, PARALLELISM), jsc.textFile(enreplacedyIds2, PARALLELISM), SEPARATOR, labelAtts1, labelAtts2);
// Start the evaluation
LongAcreplacedulator TPs = jsc.sc().longAcreplacedulator("TPs");
LongAcreplacedulator FPs = jsc.sc().longAcreplacedulator("FPs");
LongAcreplacedulator FNs = jsc.sc().longAcreplacedulator("FNs");
EvaluateLabelMatchingResults evaluation = new EvaluateLabelMatchingResults();
JavaPairRDD<Integer, Integer> gt;
if (groundTruthPath.contains("estaurant") || groundTruthPath.contains("Rexa_DBLP")) {
GT_SEPARATOR = "\t";
gt = Utils.readGroundTruthIds(jsc.textFile(groundTruthPath), GT_SEPARATOR).cache();
} else {
gt = Utils.getGroundTruthIdsFromEnreplacedyIds(jsc.textFile(enreplacedyIds1, PARALLELISM), jsc.textFile(enreplacedyIds2, PARALLELISM), jsc.textFile(groundTruthPath), GT_SEPARATOR).cache();
}
gt.cache();
System.out.println("Finished loading the ground truth with " + gt.count() + " matches, now evaluating the results...");
evaluation.evaluateResultsNEW(matches, gt, TPs, FPs, FNs);
System.out.println("Evaluation finished successfully.");
EvaluateLabelMatchingResults.printResults(TPs.value(), FPs.value(), FNs.value());
spark.stop();
}
19
Source : RelationsRank.java
with Apache License 2.0
from vefthym
with Apache License 2.0
from vefthym
/**
* return a map of topN neighbors per enreplacedy (reversed to point to in-neighbors (values) having the key enreplacedy as their top out-neighbor)
* @param rawTriples
* @param SEPARATOR
* @param enreplacedyIdsRDD
* @param MIN_SUPPORT_THRESHOLD
* @param N topN neighbors per enreplacedy
* @param positiveIds
* @param jsc
* @return
*/
public Map<Integer, IntArrayList> run(JavaRDD<String> rawTriples, String SEPARATOR, JavaRDD<String> enreplacedyIdsRDD, float MIN_SUPPORT_THRESHOLD, int N, boolean positiveIds, JavaSparkContext jsc) {
// rawTriples.persist(StorageLevel.MEMORY_AND_DISK_SER());
// List<String> subjects = Utils.getEnreplacedyUrlsFromEnreplacedyRDDInOrder(rawTriples, SEPARATOR); //a list of (distinct) subject URLs, keeping insertion order (from original triples file)
// Object2IntOpenHashMap<String> subjects = Utils.getEnreplacedyIdsMapping(rawTriples, SEPARATOR);
Object2IntOpenHashMap<String> enreplacedyIds = Utils.readEnreplacedyIdsMapping(enreplacedyIdsRDD, positiveIds);
System.out.println("Found " + enreplacedyIds.size() + " enreplacedies in collection " + (positiveIds ? "1" : "2"));
long numEnreplacediesSquared = (long) enreplacedyIds.keySet().size();
numEnreplacediesSquared *= numEnreplacediesSquared;
Broadcast<Object2IntOpenHashMap<String>> enreplacedyIds_BV = jsc.broadcast(enreplacedyIds);
// a list of (s,o) for each predicate
JavaPairRDD<String, List<Tuple2<Integer, Integer>>> relationIndex = getRelationIndex(rawTriples, SEPARATOR, enreplacedyIds_BV);
// rawTriples.unpersist();
relationIndex.persist(StorageLevel.MEMORY_AND_DISK_SER());
List<String> relationsRank = getRelationsRank(relationIndex, MIN_SUPPORT_THRESHOLD, numEnreplacediesSquared);
System.out.println("Top-5 relations in collection " + (positiveIds ? "1: " : "2: ") + Arrays.toString(relationsRank.subList(0, Math.min(5, relationsRank.size())).toArray()));
// action
JavaPairRDD<Integer, IntArrayList> topOutNeighbors = getTopOutNeighborsPerEnreplacedy(relationIndex, relationsRank, N, positiveIds);
relationIndex.unpersist();
// reverse the outNeighbors, to get in neighbors
Map<Integer, IntArrayList> inNeighbors = topOutNeighbors.flatMapToPair(x -> {
// reverse the neighbor pairs from (in,[out1,out2,out3]) to (out1,in), (out2,in), (out3,in)
List<Tuple2<Integer, Integer>> inNeighbs = new ArrayList<>();
for (int outNeighbor : x._2()) {
inNeighbs.add(new Tuple2<>(outNeighbor, x._1()));
}
return inNeighbs.iterator();
}).aggregateByKey(new IntOpenHashSet(), (x, y) -> {
x.add(y);
return x;
}, (x, y) -> {
x.addAll(y);
return x;
}).mapValues(x -> new IntArrayList(x)).collectAsMap();
return inNeighbors;
}
19
Source : CNPNeighbors.java
with Apache License 2.0
from vefthym
with Apache License 2.0
from vefthym
/**
* @param topKvalueCandidates the topK results per enreplacedy, acquired from value similarity
* @param rawTriples1 the rdf triples of the first enreplacedy collection
* @param rawTriples2 the rdf triples of the second enreplacedy collection
* @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
* @param enreplacedyIds1 the mapping of enreplacedy urls to enreplacedy ids, as it was used in blocking
* @param enreplacedyIds2
* @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
* @param K the K for topK candidate matches
* @param N the N for topN rdf relations (and neighbors)
* @param jsc the java spark context used to load files and broadcast variables
* @return topK neighbor candidates per enreplacedy
*/
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKvalueCandidates, JavaRDD<String> rawTriples1, JavaRDD<String> rawTriples2, String SEPARATOR, JavaRDD<String> enreplacedyIds1, JavaRDD<String> enreplacedyIds2, float MIN_SUPPORT_THRESHOLD, int K, int N, JavaSparkContext jsc) {
Map<Integer, IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, enreplacedyIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, enreplacedyIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
Broadcast<Map<Integer, IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
// JavaPairRDD<Tuple2<Integer, Integer>, Float> neighborSims = getNeighborSims(topKvalueCandidates, inNeighbors_BV);
// JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSimsOld(neighborSims, K);
JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSims(topKvalueCandidates, inNeighbors_BV, K);
return topKneighborCandidates;
}
19
Source : CNPARCS.java
with Apache License 2.0
from vefthym
with Apache License 2.0
from vefthym
/**
* @param topKvalueCandidates the topK results per enreplacedy, acquired from value similarity
* @param rawTriples1 the rdf triples of the first enreplacedy collection
* @param rawTriples2 the rdf triples of the second enreplacedy collection
* @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
* @param enreplacedyIds1 the mapping of enreplacedy urls to enreplacedy ids, as it was used in blocking
* @param enreplacedyIds2
* @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
* @param K the K for topK candidate matches
* @param N the N for topN rdf relations (and neighbors)
* @param jsc the java spark context used to load files and broadcast variables
* @return topK neighbor candidates per enreplacedy
*/
public JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> run2(JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKvalueCandidates, JavaRDD<String> rawTriples1, JavaRDD<String> rawTriples2, String SEPARATOR, JavaRDD<String> enreplacedyIds1, JavaRDD<String> enreplacedyIds2, float MIN_SUPPORT_THRESHOLD, int K, int N, JavaSparkContext jsc) {
Map<Integer, IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, enreplacedyIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, enreplacedyIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
Broadcast<Map<Integer, IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKneighborCandidates = getTopKNeighborSimsSUMWithScores(topKvalueCandidates, inNeighbors_BV, K);
return topKneighborCandidates;
}
19
Source : CNPARCS.java
with Apache License 2.0
from vefthym
with Apache License 2.0
from vefthym
/**
* @param topKvalueCandidates the topK results per enreplacedy, acquired from value similarity
* @param rawTriples1 the rdf triples of the first enreplacedy collection
* @param rawTriples2 the rdf triples of the second enreplacedy collection
* @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
* @param enreplacedyIds1 the mapping of enreplacedy urls to enreplacedy ids, as it was used in blocking
* @param enreplacedyIds2
* @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
* @param K the K for topK candidate matches
* @param N the N for topN rdf relations (and neighbors)
* @param jsc the java spark context used to load files and broadcast variables
* @return topK neighbor candidates per enreplacedy
*/
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKvalueCandidates, JavaRDD<String> rawTriples1, JavaRDD<String> rawTriples2, String SEPARATOR, JavaRDD<String> enreplacedyIds1, JavaRDD<String> enreplacedyIds2, float MIN_SUPPORT_THRESHOLD, int K, int N, JavaSparkContext jsc) {
Map<Integer, IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, enreplacedyIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, enreplacedyIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
Broadcast<Map<Integer, IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSimsSUM(topKvalueCandidates, inNeighbors_BV, K);
return topKneighborCandidates;
}
19
Source : AgeClassifySparkTrainer.java
with Apache License 2.0
from USCDataScience
with Apache License 2.0
from USCDataScience
public static AgeClreplacedifyModel createModel(String languageCode, String dataIn, String tokenizer, String featureGenerators, TrainingParameters trainParams) throws IOException {
SparkConf conf = new SparkConf().setAppName("AgeClreplacedifySparkTrainer");
JavaSparkContext sc = new JavaSparkContext(conf);
AgeClreplacedifyContextGeneratorWrapper wrapper = new AgeClreplacedifyContextGeneratorWrapper(tokenizer, featureGenerators);
JavaRDD<String> data = sc.textFile(dataIn, 8).cache();
JavaRDD<EventWrapper> samples = data.map(new CreateEvents(wrapper)).cache();
/*
JavaRDD<EventWrapper> samples = data.map(
new Function<String, EventWrapper>() {
public EventWrapper call(String s) {
String[] parts = s.split(",");
try {
if (parts[0] != "-1") {
Integer value = Integer.parseInt(parts[0]);
String[] text = parts[2].split(" ");
return new EventWrapper(value, text);
} else {
String cat = parts[1];
String[] text = parts[2].split(" ");
return new EventWrapper(cat, text);
}
} catch(Exception e) {
return null;
}
}
});
*/
JavaRDD<EventWrapper> validSamples = samples.filter(new Function<EventWrapper, Boolean>() {
@Override
public Boolean call(EventWrapper s) {
return s != null;
}
}).cache();
// ObjectStream<Event> eventStream = EventStreamUtil.createEventStream(samples);
ObjectStream<Event> eventStream = EventStreamUtil.createEventStream(validSamples.collect());
Map<String, String> entries = new HashMap<String, String>();
EventTrainer trainer = AgeClreplacedifyTrainerFactory.getEventTrainer(trainParams.getSettings(), entries);
MaxentModel ageModel = trainer.train(eventStream);
samples.unpersist();
data.unpersist();
sc.stop();
Map<String, String> manifestInfoEntries = new HashMap<String, String>();
AgeClreplacedifyFactory factory = AgeClreplacedifyFactory.create("AgeClreplacedifyFactory", wrapper.getTokenizer(), wrapper.getFeatureGenerators());
return new AgeClreplacedifyModel(languageCode, ageModel, manifestInfoEntries, factory);
}
19
Source : DataFrameRowFrameConversionTest.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public clreplaced DataFrameRowFrameConversionTest extends AutomatedTestBase {
private final static String TEST_DIR = "functions/mlcontext/";
private final static String TEST_NAME = "DataFrameConversion";
private final static String TEST_CLreplaced_DIR = TEST_DIR + DataFrameRowFrameConversionTest.clreplaced.getSimpleName() + "/";
private final static int rows1 = 1045;
private final static int cols1 = 545;
private final static int cols2 = 864;
private final static double sparsity1 = 0.9;
private final static double sparsity2 = 0.1;
private final static double eps = 0.0000000001;
private static SparkSession spark;
private static JavaSparkContext sc;
@BeforeClreplaced
public static void setUpClreplaced() {
spark = SparkSession.builder().appName("DataFrameRowFrameConversionTest").master("local").config("spark.memory.offHeap.enabled", "false").config("spark.sql.codegen.wholeStage", "false").getOrCreate();
sc = new JavaSparkContext(spark.sparkContext());
}
@Override
public void setUp() {
addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLreplaced_DIR, TEST_NAME, new String[] { "A", "B" }));
}
@Test
public void testRowDoubleConversionSingleDense() {
testDataFrameConversion(ValueType.FP64, true, true, false);
}
@Test
public void testRowDoubleConversionSingleDenseUnknown() {
testDataFrameConversion(ValueType.FP64, true, true, true);
}
@Test
public void testRowDoubleConversionSingleSparse() {
testDataFrameConversion(ValueType.FP64, true, false, false);
}
@Test
public void testRowDoubleConversionSingleSparseUnknown() {
testDataFrameConversion(ValueType.FP64, true, false, true);
}
@Test
public void testRowDoubleConversionMultiDense() {
testDataFrameConversion(ValueType.FP64, false, true, false);
}
@Test
public void testRowDoubleConversionMultiDenseUnknown() {
testDataFrameConversion(ValueType.FP64, false, true, true);
}
@Test
public void testRowDoubleConversionMultiSparse() {
testDataFrameConversion(ValueType.FP64, false, false, false);
}
@Test
public void testRowDoubleConversionMultiSparseUnknown() {
testDataFrameConversion(ValueType.FP64, false, false, true);
}
@Test
public void testRowStringConversionSingleDense() {
testDataFrameConversion(ValueType.STRING, true, true, false);
}
@Test
public void testRowStringConversionSingleDenseUnknown() {
testDataFrameConversion(ValueType.STRING, true, true, true);
}
@Test
public void testRowStringConversionSingleSparse() {
testDataFrameConversion(ValueType.STRING, true, false, false);
}
@Test
public void testRowStringConversionSingleSparseUnknown() {
testDataFrameConversion(ValueType.STRING, true, false, true);
}
@Test
public void testRowStringConversionMultiDense() {
testDataFrameConversion(ValueType.STRING, false, true, false);
}
@Test
public void testRowStringConversionMultiDenseUnknown() {
testDataFrameConversion(ValueType.STRING, false, true, true);
}
@Test
public void testRowStringConversionMultiSparse() {
testDataFrameConversion(ValueType.STRING, false, false, false);
}
@Test
public void testRowStringConversionMultiSparseUnknown() {
testDataFrameConversion(ValueType.STRING, false, false, true);
}
@Test
public void testRowLongConversionSingleDense() {
testDataFrameConversion(ValueType.INT64, true, true, false);
}
@Test
public void testRowLongConversionSingleDenseUnknown() {
testDataFrameConversion(ValueType.INT64, true, true, true);
}
@Test
public void testRowLongConversionSingleSparse() {
testDataFrameConversion(ValueType.INT64, true, false, false);
}
@Test
public void testRowLongConversionSingleSparseUnknown() {
testDataFrameConversion(ValueType.INT64, true, false, true);
}
@Test
public void testRowLongConversionMultiDense() {
testDataFrameConversion(ValueType.INT64, false, true, false);
}
@Test
public void testRowLongConversionMultiDenseUnknown() {
testDataFrameConversion(ValueType.INT64, false, true, true);
}
@Test
public void testRowLongConversionMultiSparse() {
testDataFrameConversion(ValueType.INT64, false, false, false);
}
@Test
public void testRowLongConversionMultiSparseUnknown() {
testDataFrameConversion(ValueType.INT64, false, false, true);
}
private void testDataFrameConversion(ValueType vt, boolean singleColBlock, boolean dense, boolean unknownDims) {
boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
ExecMode oldPlatform = DMLScript.getGlobalExecMode();
try {
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
DMLScript.setGlobalExecMode(ExecMode.HYBRID);
// generate input data and setup metadata
int cols = singleColBlock ? cols1 : cols2;
double sparsity = dense ? sparsity1 : sparsity2;
double[][] A = getRandomMatrix(rows1, cols, -10, 10, sparsity, 2373);
A = (vt == ValueType.INT64) ? TestUtils.round(A) : A;
MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
FrameBlock fbA = DataConverter.convertToFrameBlock(mbA, vt);
int blksz = ConfigurationManager.getBlocksize();
MatrixCharacteristics mc1 = new MatrixCharacteristics(rows1, cols, blksz, mbA.getNonZeros());
MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);
ValueType[] schema = UtilFunctions.nCopies(cols, vt);
// get binary block input rdd
JavaPairRDD<Long, FrameBlock> in = SparkExecutionContext.toFrameJavaPairRDD(sc, fbA);
// frame - dataframe - frame conversion
Dataset<Row> df = FrameRDDConverterUtils.binaryBlockToDataFrame(spark, in, mc1, schema);
JavaPairRDD<Long, FrameBlock> out = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, true);
// get output frame block
FrameBlock fbB = SparkExecutionContext.toFrameBlock(out, schema, rows1, cols);
// compare frame blocks
MatrixBlock mbB = DataConverter.convertToMatrixBlock(fbB);
double[][] B = DataConverter.convertToDoubleMatrix(mbB);
TestUtils.compareMatrices(A, B, rows1, cols, eps);
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
DMLScript.setGlobalExecMode(oldPlatform);
}
}
@AfterClreplaced
public static void tearDownClreplaced() {
// stop underlying spark context to allow single jvm tests (otherwise the
// next test that tries to create a SparkContext would fail)
spark.stop();
sc = null;
spark = null;
}
}
19
Source : DataFrameMatrixConversionTest.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public clreplaced DataFrameMatrixConversionTest extends AutomatedTestBase {
private final static String TEST_DIR = "functions/mlcontext/";
private final static String TEST_NAME = "DataFrameConversion";
private final static String TEST_CLreplaced_DIR = TEST_DIR + DataFrameMatrixConversionTest.clreplaced.getSimpleName() + "/";
private final static int rows1 = 2245;
private final static int rows3 = 7;
private final static int cols1 = 745;
private final static int cols2 = 1264;
private final static int cols3 = 10038;
private final static double sparsity1 = 0.9;
private final static double sparsity2 = 0.1;
private final static double eps = 0.0000000001;
private static SparkSession spark;
private static JavaSparkContext sc;
@BeforeClreplaced
public static void setUpClreplaced() {
spark = createSystemDSSparkSession("DataFrameMatrixConversionTest", "local");
sc = new JavaSparkContext(spark.sparkContext());
LazyWriteBuffer.init();
}
@Override
public void setUp() {
addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLreplaced_DIR, TEST_NAME, new String[] { "A", "B" }));
}
@Test
public void testVectorConversionSingleDense() {
testDataFrameConversion(true, cols1, true, false);
}
@Test
public void testVectorConversionSingleDenseUnknown() {
testDataFrameConversion(true, cols1, true, true);
}
@Test
public void testVectorConversionSingleSparse() {
testDataFrameConversion(true, cols1, false, false);
}
@Test
public void testVectorConversionSingleSparseUnknown() {
testDataFrameConversion(true, cols1, false, true);
}
@Test
public void testVectorConversionMultiDense() {
testDataFrameConversion(true, cols2, true, false);
}
@Test
public void testVectorConversionMultiDenseUnknown() {
testDataFrameConversion(true, cols2, true, true);
}
@Test
public void testVectorConversionMultiSparse() {
testDataFrameConversion(true, cols2, false, false);
}
@Test
public void testVectorConversionMultiSparseUnknown() {
testDataFrameConversion(true, cols2, false, true);
}
@Test
public void testRowConversionSingleDense() {
testDataFrameConversion(false, cols1, true, false);
}
@Test
public void testRowConversionSingleDenseUnknown() {
testDataFrameConversion(false, cols1, true, true);
}
@Test
public void testRowConversionSingleSparse() {
testDataFrameConversion(false, cols1, false, false);
}
@Test
public void testRowConversionSingleSparseUnknown() {
testDataFrameConversion(false, cols1, false, true);
}
@Test
public void testRowConversionMultiDense() {
testDataFrameConversion(false, cols2, true, false);
}
@Test
public void testRowConversionMultiDenseUnknown() {
testDataFrameConversion(false, cols2, true, true);
}
@Test
public void testRowConversionMultiSparse() {
testDataFrameConversion(false, cols2, false, false);
}
@Test
public void testRowConversionMultiSparseUnknown() {
testDataFrameConversion(false, cols2, false, true);
}
@Test
public void testVectorConversionWideDense() {
testDataFrameConversion(true, cols3, true, false);
}
@Test
public void testVectorConversionWideDenseUnknown() {
testDataFrameConversion(true, cols3, true, true);
}
@Test
public void testVectorConversionWideSparse() {
testDataFrameConversion(true, cols3, false, false);
}
@Test
public void testVectorConversionWideSparseUnknown() {
testDataFrameConversion(true, cols3, false, true);
}
@Test
public void testVectorConversionMultiUltraSparse() {
testDataFrameConversionUltraSparse(true, false);
}
@Test
public void testVectorConversionMultiUltraSparseUnknown() {
testDataFrameConversionUltraSparse(true, true);
}
@Test
public void testRowConversionMultiUltraSparse() {
testDataFrameConversionUltraSparse(false, false);
}
@Test
public void testRowConversionMultiUltraSparseUnknown() {
testDataFrameConversionUltraSparse(false, true);
}
private void testDataFrameConversion(boolean vector, int cols, boolean dense, boolean unknownDims) {
boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
ExecMode oldPlatform = DMLScript.getGlobalExecMode();
try {
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
DMLScript.setGlobalExecMode(ExecMode.HYBRID);
// generate input data and setup metadata
int rows = (cols == cols3) ? rows3 : rows1;
double sparsity = dense ? sparsity1 : sparsity2;
double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 2373);
MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
int blksz = ConfigurationManager.getBlocksize();
MatrixCharacteristics mc1 = new MatrixCharacteristics(rows, cols, blksz, mbA.getNonZeros());
MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);
// get binary block input rdd
JavaPairRDD<MatrixIndexes, MatrixBlock> in = SparkExecutionContext.toMatrixJavaPairRDD(sc, mbA, blksz);
// matrix - dataframe - matrix conversion
Dataset<Row> df = RDDConverterUtils.binaryBlockToDataFrame(spark, in, mc1, vector);
df = (rows == rows3) ? df.reparreplacedion(rows) : df;
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, true, vector);
// get output matrix block
MatrixBlock mbB = SparkExecutionContext.toMatrixBlock(out, rows, cols, blksz, -1);
// compare matrix blocks
double[][] B = DataConverter.convertToDoubleMatrix(mbB);
TestUtils.compareMatrices(A, B, rows, cols, eps);
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
DMLScript.setGlobalExecMode(oldPlatform);
}
}
private void testDataFrameConversionUltraSparse(boolean vector, boolean unknownDims) {
boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
ExecMode oldPlatform = DMLScript.getGlobalExecMode();
try {
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
DMLScript.setGlobalExecMode(ExecMode.HYBRID);
// generate input data and setup metadata
double[][] A = getRandomMatrix(rows1, 1, -10, 10, 0.7, 2373);
MatrixBlock mbA0 = DataConverter.convertToMatrixBlock(A);
MatrixBlock mbA = LibMatrixReorg.diag(mbA0, new MatrixBlock(rows1, rows1, true));
int blksz = ConfigurationManager.getBlocksize();
MatrixCharacteristics mc1 = new MatrixCharacteristics(rows1, rows1, blksz, mbA.getNonZeros());
MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);
// get binary block input rdd
JavaPairRDD<MatrixIndexes, MatrixBlock> in = SparkExecutionContext.toMatrixJavaPairRDD(sc, mbA, blksz);
// matrix - dataframe - matrix conversion
Dataset<Row> df = RDDConverterUtils.binaryBlockToDataFrame(spark, in, mc1, vector);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, true, vector);
// get output matrix block
MatrixBlock mbB0 = SparkExecutionContext.toMatrixBlock(out, rows1, rows1, blksz, -1);
MatrixBlock mbB = LibMatrixReorg.diag(mbB0, new MatrixBlock(rows1, 1, false));
// compare matrix blocks
double[][] B = DataConverter.convertToDoubleMatrix(mbB);
TestUtils.compareMatrices(A, B, rows1, 1, eps);
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
DMLScript.setGlobalExecMode(oldPlatform);
}
}
@AfterClreplaced
public static void tearDownClreplaced() {
// stop underlying spark context to allow single jvm tests (otherwise the
// next test that tries to create a SparkContext would fail)
spark.stop();
sc = null;
spark = null;
LazyWriteBuffer.cleanup();
}
}
19
Source : RDDConverterUtilsExt.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public static JavaPairRDD<MatrixIndexes, MatrixBlock> coordinateMatrixToBinaryBlock(JavaSparkContext sc, CoordinateMatrix input, DataCharacteristics mcIn, boolean outputEmptyBlocks) {
// convert matrix entry rdd to binary block rdd (w/ partial blocks)
JavaPairRDD<MatrixIndexes, MatrixBlock> out = input.entries().toJavaRDD().mapParreplacedionsToPair(new MatrixEntryToBinaryBlockFunction(mcIn));
// inject empty blocks (if necessary)
if (outputEmptyBlocks && mcIn.mightHaveEmptyBlocks()) {
out = out.union(SparkUtils.getEmptyBlockRDD(sc, mcIn));
}
// aggregate partial matrix blocks
out = RDDAggregateUtils.mergeByKey(out, false);
return out;
}
19
Source : RDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public static JavaPairRDD<MatrixIndexes, MatrixBlock> textCellToBinaryBlock(JavaSparkContext sc, JavaPairRDD<LongWritable, Text> input, DataCharacteristics mcOut, boolean outputEmptyBlocks, FileFormatPropertiesMM mmProps) {
// convert textcell rdd to binary block rdd (w/ partial blocks)
JavaPairRDD<MatrixIndexes, MatrixBlock> out = input.values().mapParreplacedionsToPair(new TextToBinaryBlockFunction(mcOut, mmProps));
// inject empty blocks (if necessary)
if (outputEmptyBlocks && mcOut.mightHaveEmptyBlocks()) {
out = out.union(SparkUtils.getEmptyBlockRDD(sc, mcOut));
}
// aggregate partial matrix blocks
out = RDDAggregateUtils.mergeByKey(out, false);
return out;
}
19
Source : RDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
/**
* Converts a libsvm text input file into two binary block matrices for features
* and labels, and saves these to the specified output files. This call also deletes
* existing files at the specified output locations, as well as determines and
* writes the meta data files of both output matrices.
* <p>
* Note: We use {@code org.apache.spark.mllib.util.MLUtils.loadLibSVMFile} for parsing
* the libsvm input files in order to ensure consistency with Spark.
*
* @param sc java spark context
* @param pathIn path to libsvm input file
* @param pathX path to binary block output file of features
* @param pathY path to binary block output file of labels
* @param mcOutX matrix characteristics of output matrix X
*/
public static void libsvmToBinaryBlock(JavaSparkContext sc, String pathIn, String pathX, String pathY, DataCharacteristics mcOutX) {
if (!mcOutX.dimsKnown())
throw new DMLRuntimeException("Matrix characteristics " + "required to convert sparse input representation.");
try {
// cleanup existing output files
HDFSTool.deleteFileIfExistOnHDFS(pathX);
HDFSTool.deleteFileIfExistOnHDFS(pathY);
// convert libsvm to labeled points
int numFeatures = (int) mcOutX.getCols();
int numParreplacedions = SparkUtils.getNumPreferredParreplacedions(mcOutX, null);
JavaRDD<org.apache.spark.mllib.regression.LabeledPoint> lpoints = MLUtils.loadLibSVMFile(sc.sc(), pathIn, numFeatures, numParreplacedions).toJavaRDD();
// append row index and best-effort caching to avoid repeated text parsing
JavaPairRDD<org.apache.spark.mllib.regression.LabeledPoint, Long> ilpoints = lpoints.zipWithIndex().persist(StorageLevel.MEMORY_AND_DISK());
// extract labels and convert to binary block
DataCharacteristics mc1 = new MatrixCharacteristics(mcOutX.getRows(), 1, mcOutX.getBlocksize(), -1);
LongAcreplacedulator aNnz1 = sc.sc().longAcreplacedulator("nnz");
JavaPairRDD<MatrixIndexes, MatrixBlock> out1 = ilpoints.mapParreplacedionsToPair(new LabeledPointToBinaryBlockFunction(mc1, true, aNnz1));
int numParreplacedions2 = SparkUtils.getNumPreferredParreplacedions(mc1, null);
out1 = RDDAggregateUtils.mergeByKey(out1, numParreplacedions2, false);
out1.saveAsHadoopFile(pathY, MatrixIndexes.clreplaced, MatrixBlock.clreplaced, SequenceFileOutputFormat.clreplaced);
// update nnz after triggered save
mc1.setNonZeros(aNnz1.value());
HDFSTool.writeMetaDataFile(pathY + ".mtd", ValueType.FP64, mc1, OutputInfo.BinaryBlockOutputInfo);
// extract data and convert to binary block
DataCharacteristics mc2 = new MatrixCharacteristics(mcOutX.getRows(), mcOutX.getCols(), mcOutX.getBlocksize(), -1);
LongAcreplacedulator aNnz2 = sc.sc().longAcreplacedulator("nnz");
JavaPairRDD<MatrixIndexes, MatrixBlock> out2 = ilpoints.mapParreplacedionsToPair(new LabeledPointToBinaryBlockFunction(mc2, false, aNnz2));
out2 = RDDAggregateUtils.mergeByKey(out2, numParreplacedions, false);
out2.saveAsHadoopFile(pathX, MatrixIndexes.clreplaced, MatrixBlock.clreplaced, SequenceFileOutputFormat.clreplaced);
// update nnz after triggered save
mc2.setNonZeros(aNnz2.value());
HDFSTool.writeMetaDataFile(pathX + ".mtd", ValueType.FP64, mc2, OutputInfo.BinaryBlockOutputInfo);
// asynchronous cleanup of cached intermediates
ilpoints.unpersist(false);
} catch (IOException ex) {
throw new DMLRuntimeException(ex);
}
}
19
Source : RDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public static JavaPairRDD<MatrixIndexes, MatrixBlock> csvToBinaryBlock(JavaSparkContext sc, JavaRDD<String> input, DataCharacteristics mcOut, boolean hasHeader, String delim, boolean fill, double fillValue) {
// convert string rdd to serializable longwritable/text
JavaPairRDD<LongWritable, Text> prepinput = input.mapToPair(new StringToSerTextFunction());
// convert to binary block
return csvToBinaryBlock(sc, prepinput, mcOut, hasHeader, delim, fill, fillValue);
}
19
Source : RDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public static JavaPairRDD<MatrixIndexes, MatrixBlock> binaryCellToBinaryBlock(JavaSparkContext sc, JavaPairRDD<MatrixIndexes, MatrixCell> input, DataCharacteristics mcOut, boolean outputEmptyBlocks) {
// convert binarycell rdd to binary block rdd (w/ partial blocks)
JavaPairRDD<MatrixIndexes, MatrixBlock> out = input.mapParreplacedionsToPair(new BinaryCellToBinaryBlockFunction(mcOut));
// inject empty blocks (if necessary)
if (outputEmptyBlocks && mcOut.mightHaveEmptyBlocks()) {
out = out.union(SparkUtils.getEmptyBlockRDD(sc, mcOut));
}
// aggregate partial matrix blocks
out = RDDAggregateUtils.mergeByKey(out, false);
return out;
}
19
Source : RDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToBinaryBlock(JavaSparkContext sc, Dataset<Row> df, DataCharacteristics mc, boolean containsID, boolean isVector) {
// determine unknown dimensions and sparsity if required
if (!mc.dimsKnown(true)) {
LongAcreplacedulator aNnz = sc.sc().longAcreplacedulator("nnz");
JavaRDD<Row> tmp = df.javaRDD().map(new DataFramereplacedysisFunction(aNnz, containsID, isVector));
long rlen = tmp.count();
long clen = !isVector ? df.columns().length - (containsID ? 1 : 0) : ((Vector) tmp.first().get(containsID ? 1 : 0)).size();
long nnz = UtilFunctions.toLong(aNnz.value());
mc.set(rlen, clen, mc.getBlocksize(), nnz);
}
// ensure valid blocksizes
if (mc.getBlocksize() <= 1)
mc.setBlocksize(ConfigurationManager.getBlocksize());
// construct or reuse row ids
JavaPairRDD<Row, Long> prepinput = containsID ? df.javaRDD().mapToPair(new DataFrameExtractIDFunction(df.schema().fieldIndex(DF_ID_COLUMN))) : // zip row index
df.javaRDD().zipWithIndex();
// convert csv rdd to binary block rdd (w/ partial blocks)
boolean sparse = requiresSparseAllocation(prepinput, mc);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = prepinput.mapParreplacedionsToPair(new DataFrameToBinaryBlockFunction(mc, sparse, containsID, isVector));
// aggregate partial matrix blocks (w/ preferred number of output
// parreplacedions as the data is likely smaller in binary block format,
// but also to bound the size of parreplacedions for compressed inputs)
int parts = SparkUtils.getNumPreferredParreplacedions(mc, out);
return RDDAggregateUtils.mergeByKey(out, parts, false);
}
19
Source : RDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public static JavaPairRDD<MatrixIndexes, MatrixBlock> csvToBinaryBlock(JavaSparkContext sc, JavaPairRDD<LongWritable, Text> input, DataCharacteristics mc, boolean hasHeader, String delim, boolean fill, double fillValue) {
// determine unknown dimensions and sparsity if required
// (w/ robustness for mistakenly counted header in nnz)
if (!mc.dimsKnown(true)) {
LongAcreplacedulator aNnz = sc.sc().longAcreplacedulator("nnz");
JavaRDD<String> tmp = input.values().map(new CSVreplacedysisFunction(aNnz, delim));
long rlen = tmp.count() - (hasHeader ? 1 : 0);
long clen = tmp.first().split(delim).length;
long nnz = Math.min(rlen * clen, UtilFunctions.toLong(aNnz.value()));
mc.set(rlen, clen, mc.getBlocksize(), nnz);
}
// prepare csv w/ row indexes (sorted by filenames)
JavaPairRDD<Text, Long> prepinput = input.values().zipWithIndex();
// convert csv rdd to binary block rdd (w/ partial blocks)
boolean sparse = requiresSparseAllocation(prepinput, mc);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = prepinput.mapParreplacedionsToPair(new CSVToBinaryBlockFunction(mc, sparse, hasHeader, delim, fill, fillValue));
// aggregate partial matrix blocks (w/ preferred number of output
// parreplacedions as the data is likely smaller in binary block format,
// but also to bound the size of parreplacedions for compressed inputs)
int parts = SparkUtils.getNumPreferredParreplacedions(mc, out);
return RDDAggregateUtils.mergeByKey(out, parts, false);
}
19
Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
// =====================================
// Text cell <--> Binary block
public static JavaPairRDD<Long, FrameBlock> textCellToBinaryBlock(JavaSparkContext sc, JavaPairRDD<LongWritable, Text> in, DataCharacteristics mcOut, ValueType[] schema) {
// convert input rdd to serializable long/frame block
JavaPairRDD<Long, Text> input = in.mapToPair(new LongWritableTextToLongTextFunction());
// do actual conversion
return textCellToBinaryBlockLongIndex(sc, input, mcOut, schema);
}
19
Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
// =====================================
// DataFrame <--> Binary block
public static JavaPairRDD<Long, FrameBlock> dataFrameToBinaryBlock(JavaSparkContext sc, Dataset<Row> df, DataCharacteristics mc, boolean containsID) {
return dataFrameToBinaryBlock(sc, df, mc, containsID, new Pair<String[], ValueType[]>());
}
19
Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public static JavaPairRDD<Long, FrameBlock> textCellToBinaryBlockLongIndex(JavaSparkContext sc, JavaPairRDD<Long, Text> input, DataCharacteristics mc, ValueType[] schema) {
// prepare default schema if needed
if (schema == null || schema.length == 1) {
schema = UtilFunctions.nCopies((int) mc.getCols(), (schema != null) ? schema[0] : ValueType.STRING);
}
// convert textcell rdd to binary block rdd (w/ partial blocks)
JavaPairRDD<Long, FrameBlock> output = input.values().mapParreplacedionsToPair(new TextToBinaryBlockFunction(mc, schema));
// aggregate partial matrix blocks
return FrameRDDAggregateUtils.mergeByKey(output);
}
19
Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public static JavaPairRDD<Long, FrameBlock> dataFrameToBinaryBlock(JavaSparkContext sc, Dataset<Row> df, DataCharacteristics mc, boolean containsID, Pair<String[], ValueType[]> out) {
// determine unknown dimensions if required
if (!mc.dimsKnown()) {
// nnz are irrelevant here
int colVect = getColVectFromDFSchema(df.schema(), containsID);
int off = (containsID ? 1 : 0);
long rlen = df.count();
long clen = df.columns().length - off + ((colVect >= 0) ? ((Vector) df.first().get(off + colVect)).size() - 1 : 0);
mc.set(rlen, clen, mc.getBlocksize(), -1);
}
// append or reuse row index column
JavaPairRDD<Row, Long> prepinput = containsID ? df.javaRDD().mapToPair(new DataFrameExtractIDFunction(df.schema().fieldIndex(RDDConverterUtils.DF_ID_COLUMN))) : // zip row index
df.javaRDD().zipWithIndex();
// convert data frame to frame schema (prepare once)
String[] colnames = new String[(int) mc.getCols()];
ValueType[] fschema = new ValueType[(int) mc.getCols()];
int colVect = convertDFSchemaToFrameSchema(df.schema(), colnames, fschema, containsID);
// make schema available
out.set(colnames, fschema);
// convert rdd to binary block rdd
return prepinput.mapParreplacedionsToPair(new DataFrameToBinaryBlockFunction(mc, colnames, fschema, containsID, colVect));
}
19
Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public static JavaPairRDD<Long, FrameBlock> matrixBlockToBinaryBlockLongIndex(JavaSparkContext sc, JavaPairRDD<MatrixIndexes, MatrixBlock> input, DataCharacteristics dcIn) {
JavaPairRDD<MatrixIndexes, MatrixBlock> in = input;
DataCharacteristics mc = new MatrixCharacteristics(dcIn);
// reblock matrix blocks if required (multiple column blocks)
if (dcIn.getCols() > dcIn.getBlocksize()) {
// split matrix blocks into extended matrix blocks
in = in.flatMapToPair(new MatrixFrameReblockFunction(dcIn));
mc.setBlocksize(MatrixFrameReblockFunction.computeBlockSize(mc));
// shuffle matrix blocks (instead of frame blocks) in order to exploit
// sparse formats (for sparse or wide matrices) during shuffle
in = RDDAggregateUtils.mergeByKey(in, false);
}
// convert individual matrix blocks to frame blocks (w/o shuffle)
return in.mapToPair(new MatrixToFrameBlockFunction(mc));
}
19
Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public static JavaPairRDD<Long, FrameBlock> csvToBinaryBlock(JavaSparkContext sc, JavaRDD<String> input, DataCharacteristics mcOut, ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue) {
// convert string rdd to serializable longwritable/text
JavaPairRDD<LongWritable, Text> prepinput = input.mapToPair(new StringToSerTextFunction());
// convert to binary block
return csvToBinaryBlock(sc, prepinput, mcOut, schema, hasHeader, delim, fill, fillValue);
}
19
Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
/*
* It will return JavaRDD<Row> based on csv data input file.
*/
public static JavaRDD<Row> csvToRowRDD(JavaSparkContext sc, String fnameIn, String delim, ValueType[] schema) {
// Load a text file and convert each line to a java rdd.
JavaRDD<String> dataRdd = sc.textFile(fnameIn);
return dataRdd.map(new RowGenerator(schema, delim));
}
19
Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
// =====================================
// Matrix block <--> Binary block
public static JavaPairRDD<LongWritable, FrameBlock> matrixBlockToBinaryBlock(JavaSparkContext sc, JavaPairRDD<MatrixIndexes, MatrixBlock> input, DataCharacteristics mcIn) {
// convert and map to serializable LongWritable/frame block
return matrixBlockToBinaryBlockLongIndex(sc, input, mcIn).mapToPair(new LongFrameToLongWritableFrameFunction());
}
19
Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
// =====================================
// CSV <--> Binary block
public static JavaPairRDD<Long, FrameBlock> csvToBinaryBlock(JavaSparkContext sc, JavaPairRDD<LongWritable, Text> input, DataCharacteristics mc, ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue) {
// determine unknown dimensions and sparsity if required
if (!mc.dimsKnown()) {
// nnz irrelevant here
JavaRDD<String> tmp = input.values().map(new TextToStringFunction());
String tmpStr = tmp.first();
boolean metaHeader = tmpStr.startsWith(TfUtils.TXMTD_MVPREFIX) || tmpStr.startsWith(TfUtils.TXMTD_NDPREFIX);
tmpStr = (metaHeader) ? tmpStr.substring(tmpStr.indexOf(delim) + 1) : tmpStr;
long rlen = tmp.count() - (hasHeader ? 1 : 0) - (metaHeader ? 2 : 0);
long clen = IOUtilFunctions.splitCSV(tmpStr, delim).length;
mc.set(rlen, clen, mc.getBlocksize(), -1);
}
// prepare csv w/ row indexes (sorted by filenames)
JavaPairRDD<Text, Long> prepinput = input.values().zipWithIndex();
// prepare default schema if needed
if (schema == null || schema.length == 1)
schema = UtilFunctions.nCopies((int) mc.getCols(), ValueType.STRING);
// convert csv rdd to binary block rdd (w/ partial blocks)
JavaPairRDD<Long, FrameBlock> out = prepinput.mapParreplacedionsToPair(new CSVToBinaryBlockFunction(mc, schema, hasHeader, delim));
return out;
}
19
Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
/*
* It will return JavaRDD<Row> based on csv data.
*/
public static JavaRDD<Row> csvToRowRDD(JavaSparkContext sc, JavaRDD<String> dataRdd, String delim, ValueType[] schema) {
// Convert each line to a java rdd.
return dataRdd.map(new RowGenerator(schema, delim));
}
19
Source : MultiReturnParameterizedBuiltinSPInstruction.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
private static MaxLongAcreplacedulator registerMaxLongAcreplacedulator(JavaSparkContext sc) {
MaxLongAcreplacedulator acc = new MaxLongAcreplacedulator(Long.MIN_VALUE);
sc.sc().register(acc, "max");
return acc;
}
19
Source : RemoteDPParForSpark.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
public static RemoteParForJobReturn runJob(long pfid, String itervar, String matrixvar, String program, HashMap<String, byte[]> clsMap, String resultFile, MatrixObject input, ExecutionContext ec, ParreplacedionFormat dpf, OutputInfo oi, boolean tSparseCol, boolean enableCPCaching, int numReducers) {
String jobname = "ParFor-DPESP";
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
SparkExecutionContext sec = (SparkExecutionContext) ec;
JavaSparkContext sc = sec.getSparkContext();
// prepare input parameters
MatrixObject mo = sec.getMatrixObject(matrixvar);
DataCharacteristics mc = mo.getDataCharacteristics();
// initialize acreplacedulators for tasks/iterations, and inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryMatrixBlockRDDHandleForVariable(matrixvar);
LongAcreplacedulator aTasks = sc.sc().longAcreplacedulator("tasks");
LongAcreplacedulator aIters = sc.sc().longAcreplacedulator("iterations");
// compute number of reducers (to avoid OOMs and reduce memory pressure)
int numParts = SparkUtils.getNumPreferredParreplacedions(mc, in);
int numReducers2 = Math.max(numReducers, Math.min(numParts, (int) dpf.getNumParts(mc)));
// core parfor dataparreplacedion-execute (w/ or w/o shuffle, depending on data characteristics)
RemoteDPParForSparkWorker efun = new RemoteDPParForSparkWorker(program, clsMap, matrixvar, itervar, enableCPCaching, mc, tSparseCol, dpf, oi, aTasks, aIters);
JavaPairRDD<Long, Writable> tmp = getParreplacedionedInput(sec, matrixvar, oi, dpf);
List<Tuple2<Long, String>> out = (requiresGrouping(dpf, mo) ? tmp.groupByKey(numReducers2) : tmp.map(new PseudoGrouping())).mapParreplacedionsToPair(// execute parfor tasks, incl cleanup
efun).collect();
// de-serialize results
LocalVariableMap[] results = RemoteParForUtils.getResults(out, LOG);
// get acreplacedulator value
int numTasks = aTasks.value().intValue();
// get acreplacedulator value
int numIters = aIters.value().intValue();
// create output symbol table entries
RemoteParForJobReturn ret = new RemoteParForJobReturn(true, numTasks, numIters, results);
// maintain statistics
Statistics.incrementNoOfCompiledSPInst();
Statistics.incrementNoOfExecutedSPInst();
if (DMLScript.STATISTICS) {
Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
}
return ret;
}
19
Source : VectorOpsMain.java
with Apache License 2.0
from tudorv91
with Apache License 2.0
from tudorv91
public clreplaced VectorOpsMain {
private static JavaSparkContext jscSingleton;
private static String nativePath = null;
private static String appName = "vectorOps";
private static final boolean debug = true;
private static void initSparkJNI() {
nativePath = Paths.get("sparkjni-examples/src/main/cpp/examples/vectorOps").normalize().toAbsolutePath().toString();
appName = "vectorOps";
String sparkjniClreplacedpath = FileSystems.getDefault().getPath("core/target/clreplacedes").toAbsolutePath().normalize().toString();
String examplesClreplacedpath = FileSystems.getDefault().getPath("sparkjni-examples/target/clreplacedes").toAbsolutePath().normalize().toString();
SparkJni sparkJni = new SparkJniBuilder().nativePath(nativePath).appName(appName).build();
sparkJni.setDeployMode(new DeployMode(JUST_BUILD)).addToClreplacedpath(sparkjniClreplacedpath, examplesClreplacedpath);
sparkJni.registerContainer(VectorBean.clreplaced).registerJniFunction(VectorMulJni.clreplaced).registerJniFunction(VectorAddJni.clreplaced);
sparkJni.deploy();
}
private static JavaSparkContext getSparkContext() {
if (jscSingleton == null) {
SparkConf sparkConf = new SparkConf().setAppName(appName);
sparkConf.setMaster("local[4]");
jscSingleton = new JavaSparkContext(sparkConf);
}
return jscSingleton;
}
private static ArrayList<VectorBean> generateVectors(int noVectors, int vectorSize) {
ArrayList<VectorBean> vectors = new ArrayList<>();
for (int i = 0; i < noVectors; i++) {
int[] data = new int[vectorSize];
if (debug)
System.out.println(String.format("Vector %d:", i));
for (int idx = 0; idx < vectorSize; idx++) {
data[idx] = (int) (Math.random() * 1000);
if (debug)
System.out.println(String.format("idx %d: %d", idx, data[idx]));
}
vectors.add(new VectorBean(data));
}
return vectors;
}
public static void main(String[] args) {
initSparkJNI();
String libPath = String.format(CppSyntax.NATIVE_LIB_PATH, nativePath, appName);
JavaRDD<VectorBean> vectorsRdd = getSparkContext().parallelize(generateVectors(2, 4));
JavaRDD<VectorBean> mulResults = vectorsRdd.map(new VectorMulJni(libPath, "mapVectorMul"));
VectorBean results = mulResults.reduce(new VectorAddJni(libPath, "reduceVectorAdd"));
debugRes(results);
}
private static void debugRes(VectorBean vector) {
if (debug) {
System.out.println("Result:");
for (int i = 0; i < vector.data.length; i++) System.out.println(vector.data[i]);
}
}
}
19
Source : ExampleUtils.java
with Apache License 2.0
from tudorv91
with Apache License 2.0
from tudorv91
public clreplaced ExampleUtils {
private static JavaSparkContext jscSingleton;
public static ArrayList<VectorBean> generateVectors(int noVectors, int vectorSize, boolean debug) {
ArrayList<VectorBean> vectors = new ArrayList<>();
for (int i = 0; i < noVectors; i++) {
int[] data = new int[vectorSize];
if (debug)
System.out.println(String.format("Vector %d:", i));
for (int idx = 0; idx < vectorSize; idx++) {
data[idx] = (int) (Math.random() * 1000);
if (debug)
System.out.println(String.format("idx %d: %d", idx, data[idx]));
}
vectors.add(new VectorBean(data));
}
return vectors;
}
public static JavaSparkContext getSparkContext(String appName) {
if (jscSingleton == null) {
SparkConf sparkConf = new SparkConf().setAppName(appName);
sparkConf.setMaster("local[*]");
sparkConf.set("spark.driver.maxResultSize", "16g");
jscSingleton = new JavaSparkContext(sparkConf);
}
return jscSingleton;
}
public static void initSparkJNI(String appName, String nativePath) {
String sparkjniClreplacedpath = FileSystems.getDefault().getPath("core/target/clreplacedes").toAbsolutePath().normalize().toString();
String examplesClreplacedpath = FileSystems.getDefault().getPath("sparkjni-examples/target/clreplacedes").toAbsolutePath().normalize().toString();
SparkJni sparkJni = new SparkJniBuilder().nativePath(nativePath).appName(appName).build();
sparkJni.setDeployMode(new DeployMode(JUST_BUILD)).addToClreplacedpath(sparkjniClreplacedpath, examplesClreplacedpath);
sparkJni.registerContainer(VectorBean.clreplaced).registerJniFunction(VectorMulJni.clreplaced).registerJniFunction(VectorAddJni.clreplaced);
sparkJni.deploy();
}
}
19
Source : GeneratorVectorOpsMain.java
with Apache License 2.0
from tudorv91
with Apache License 2.0
from tudorv91
public clreplaced GeneratorVectorOpsMain {
private static JavaSparkContext jscSingleton;
private static String appName = "vectorOps";
private static boolean debug = true;
public static JavaSparkContext getSparkContext() {
if (jscSingleton == null) {
SparkConf sparkConf = new SparkConf().setAppName(appName);
sparkConf.setMaster("local[4]");
jscSingleton = new JavaSparkContext(sparkConf);
}
return jscSingleton;
}
public static ArrayList<VectorBean> generateVectors(int noVectors, int vectorSize) {
ArrayList<VectorBean> vectors = new ArrayList<VectorBean>();
for (int i = 0; i < noVectors; i++) {
int[] data = new int[vectorSize];
if (debug)
System.out.println(String.format("Vector %d:", i));
for (int idx = 0; idx < vectorSize; idx++) {
data[idx] = (int) (Math.random() * 1000);
if (debug)
System.out.println(String.format("idx %d: %d", idx, data[idx]));
}
vectors.add(new VectorBean(data));
}
return vectors;
}
public static void main(String[] args) {
String nativePath = Paths.get("src/test/resources/vectorOps/src/main/resources/vectorOps").toAbsolutePath().toString();
String relativeLibPath = String.format(CppSyntax.NATIVE_LIB_PATH, nativePath, appName);
String absLibPath = new File(relativeLibPath).toPath().toAbsolutePath().toString();
JavaRDD<VectorBean> vectorsRdd = getSparkContext().parallelize(generateVectors(2, 4));
JavaRDD<VectorBean> mulResults = vectorsRdd.map(new VectorMulJni(absLibPath, "mapVectorMul"));
VectorBean results = mulResults.reduce(new VectorAddJni(absLibPath, "reduceVectorAdd"));
debugRes(results);
}
private static void debugRes(VectorBean vector) {
if (debug) {
System.out.println("Result:");
for (int i = 0; i < vector.data.length; i++) System.out.println(vector.data[i]);
}
}
}
19
Source : TestUtils.java
with Apache License 2.0
from tudorv91
with Apache License 2.0
from tudorv91
public clreplaced TestUtils {
public static final String CLUSTER_CONF_LOCAL_4 = "local[4]";
public String defaultTestFolder = "resources/%s";
public File testDir;
public String fullPath;
public String appName;
private static JavaSparkContext jscSingleton = null;
public TestUtils(Clreplaced callerClreplaced) {
appName = callerClreplaced.getSimpleName();
defaultTestFolder = String.format(defaultTestFolder, appName + "_TEST");
initTestDir();
}
public String initTestDir() {
testDir = new File(defaultTestFolder);
if (testDir.exists())
cleanTestDir();
if (!testDir.mkdirs())
throw new RuntimeException(String.format("Failed to create testdir %s", testDir.getAbsolutePath()));
fullPath = testDir.getAbsolutePath();
return fullPath;
}
public void cleanTestDir() {
try {
FileUtils.deleteDirectory(testDir);
} catch (IOException e) {
e.printStackTrace();
}
}
public JavaSparkContext getSparkContext() {
if (jscSingleton == null) {
SparkConf sparkConf = new SparkConf().setAppName(appName);
sparkConf.setMaster(CLUSTER_CONF_LOCAL_4);
jscSingleton = new JavaSparkContext(sparkConf);
}
return jscSingleton;
}
public SparkJni getSparkJni(String clreplacedpath) {
return new SparkJniBuilder().appName(appName).nativePath(fullPath).clreplacedpath(clreplacedpath).build();
}
public String getLibPath() {
return JniUtils.generateDefaultLibPath(appName, fullPath);
}
public SparkJni getSparkJni() {
return getSparkJni(JniUtils.getClreplacedpath());
}
}
19
Source : SparkJni.java
with Apache License 2.0
from tudorv91
with Apache License 2.0
from tudorv91
public clreplaced SparkJni {
private final MetadataHandler metadataHandler;
private final DeployTimesLogger deployTimesLogger;
private final JniLinkHandler jniLinkHandler;
private final MakefileGenerator makefileGenerator;
private final NativeFunctionPrototypesCollector nativeFunctionPrototypesCollector;
private static JniRootContainer jniRootContainer;
private static JavaSparkContext javaSparkContext;
private HashMap<String, String> functionCodeInjectorMap;
private DeployMode deployMode;
private boolean overWriteKernelFile = false;
@Inject
private SparkJni(@Nonnull MetadataHandler metadataHandler, @Nonnull DeployTimesLogger deployTimesLogger, @Nonnull Provider<JniLinkHandler> jniLinkHandlerProvider, @Nonnull MakefileGenerator makefileGenerator, @Nonnull NativeFunctionPrototypesCollector nativeFunctionPrototypesCollector) {
this.metadataHandler = metadataHandler;
this.deployTimesLogger = deployTimesLogger;
this.jniLinkHandler = jniLinkHandlerProvider.get();
this.makefileGenerator = makefileGenerator;
this.nativeFunctionPrototypesCollector = nativeFunctionPrototypesCollector;
// by default, follow the entire deploy process
deployMode = new DeployMode(DeployMode.DeployModes.FULL_GENERATE_AND_BUILD);
}
@Builder.Factory
static SparkJni sparkJni(@Nonnull Optional<String> appName, @Nonnull String nativePath, @Nonnull Optional<String> jdkPath, @Nonnull Optional<String> clreplacedpath) {
final SparkJni sparkJniSingleton = injectSparkJni();
sparkJniSingleton.initVars(appName.isPresent() ? appName.get() : null, nativePath, jdkPath.isPresent() ? jdkPath.get() : null);
clreplacedpath.transform(new Function<String, Object>() {
@Nullable
@Override
public Object apply(@Nullable String s) {
sparkJniSingleton.addToClreplacedpath(s);
return new Object();
}
});
return sparkJniSingleton;
}
private void initVars(String appName, String nativePath, String jdkPath) {
setAppName(appName);
setNativePath(nativePath);
setJdkPath(jdkPath);
}
public void deploy() {
deployTimesLogger.start = System.currentTimeMillis();
processCppContent();
loadNativeLib();
}
public void deployWithCodeInjections(HashMap<String, String> functionCodeInjectorMap) {
if (!functionCodeInjectorMap.isEmpty()) {
this.functionCodeInjectorMap = functionCodeInjectorMap;
}
deploy();
}
private void loadNativeLib() {
String libraryFullPath = JniUtils.generateDefaultLibPath(metadataHandler.getAppName(), metadataHandler.getNativePath());
if (javaSparkContext != null) {
javaSparkContext.addFile(libraryFullPath);
} else {
System.load(libraryFullPath);
}
deployTimesLogger.libLoadTime = System.currentTimeMillis() - deployTimesLogger.start;
}
private void processCppContent() {
checkNativePath();
jniLinkHandler.deployLink();
executeAndBenchmarkJavah();
generateAndCheckMakefile();
generateJniRootContainer();
generateKernelFiles();
build();
}
private void executeAndBenchmarkJavah() {
long startJavah = System.currentTimeMillis();
if (deployMode.doJavah) {
jniLinkHandler.javah(metadataHandler.getClreplacedpath());
}
nativeFunctionPrototypesCollector.collectNativeFunctionPrototypes();
deployTimesLogger.javahTime = System.currentTimeMillis() - startJavah;
}
public void addToClreplacedpath(String... clreplacedpath) {
for (String cPath : clreplacedpath) {
metadataHandler.addToClreplacedpath(cPath);
}
}
private void generateJniRootContainer() {
jniRootContainer = ImmutableJniRootContainerProvider.builder().build().buildJniRootContainer(metadataHandler.getNativePath(), metadataHandler.getAppName());
}
private void generateKernelFiles() {
KernelFileWrapperHeader kernelFileWrapperHeader = getKernelFileWrapperHeader();
if (!deployMode.doForceOverwriteKernelWrappers) {
return;
}
if (!kernelFileWrapperHeader.writeKernelWrapperFile()) {
throw new HardSparkJniException(Messages.ERR_KERNEL_FILE_GENERATION_FAILED);
}
if (deployMode.doForceOverwriteKernelWrappers) {
KernelFile kernelFile = kernelFileWrapperHeader.getKernelFile();
if (functionCodeInjectorMap != null && !functionCodeInjectorMap.isEmpty()) {
injectFunctionCodeBody(kernelFile.userNativeFunctions());
}
kernelFile.writeKernelFile(overWriteKernelFile);
}
}
private void injectFunctionCodeBody(List<UserNativeFunction> userNativeFunctions) {
for (UserNativeFunction userNativeFunction : userNativeFunctions) {
String functionName = userNativeFunction.functionSignatureMapper().functionNameMapper().cppName();
String codeBody = functionCodeInjectorMap.get(functionName);
if (codeBody == null)
continue;
userNativeFunction.setFunctionBodyCodeInsertion(Optional.of(codeBody));
}
}
private void generateAndCheckMakefile() {
if (deployMode.doGenerateMakefile)
if (!makefileGenerator.generateMakefile(deployMode)) {
System.err.println(Messages.MAKEFILE_GENERATION_FAILED_ERROR);
System.exit(3);
}
}
private void build() {
String nativePath = metadataHandler.getNativePath();
deployTimesLogger.genTime = System.currentTimeMillis() - deployTimesLogger.start - deployTimesLogger.javahTime;
deployTimesLogger.start = System.currentTimeMillis();
if (deployMode.doBuild) {
JniUtils.runProcess(String.format(CppSyntax.EXEC_MAKE_CLEAN, nativePath));
JniUtils.runProcess(String.format(CppSyntax.EXEC_MAKE, nativePath));
}
deployTimesLogger.buildTime = System.currentTimeMillis() - deployTimesLogger.start;
}
private void checkNativePath() {
if (metadataHandler.getNativePath() == null) {
System.err.println(Messages.NATIVE_PATH_NOT_SET);
System.exit(1);
}
File nativePathDir = new File(metadataHandler.getNativePath());
if (!nativePathDir.exists() || !nativePathDir.isDirectory()) {
System.err.println(Messages.NATIVE_PATH_ERROR + ":" + nativePathDir.getAbsolutePath());
System.exit(2);
}
}
public void registerClreplacedifier(SparkJniClreplacedifier sparkJniClreplacedifier) {
for (Clreplaced functionClreplaced : sparkJniClreplacedifier.getJniFunctionClreplacedes()) {
registerJniFunction(functionClreplaced);
}
for (Clreplaced beanClreplaced : sparkJniClreplacedifier.getBeanClreplacedes()) {
registerContainer(beanClreplaced);
}
}
/**
* Set the user defines pragma for the build stage flags.
*
* @param userDefines
*/
@SuppressWarnings("unused")
public SparkJni setUserDefines(String userDefines) {
metadataHandler.setUserDefines(userDefines);
return this;
}
/**
* Set the personalized user directories.
*
* @param userLibraryDirs
*/
@SuppressWarnings("unused")
public SparkJni setUserLibraryDirs(String userLibraryDirs) {
metadataHandler.setUserLibraryDirs(userLibraryDirs);
return this;
}
public SparkJni setSparkContext(JavaSparkContext javaSparkContext) {
SparkJni.javaSparkContext = javaSparkContext;
return this;
}
/**
* Set the personalized user include directories.
*
* @param userIncludeDirs
*/
public SparkJni setUserIncludeDirs(String userIncludeDirs) {
metadataHandler.setUserIncludeDirs(userIncludeDirs);
return this;
}
@SuppressWarnings("unused")
public SparkJni setUserLibraries(String userLibraries) {
metadataHandler.setUserLibraries(userLibraries);
return this;
}
public SparkJni setJdkPath(String jdkPath) {
metadataHandler.setJdkPath(jdkPath);
return this;
}
private SparkJni setNativePath(String nativePath) {
metadataHandler.setNativePath(nativePath);
return this;
}
private SparkJni setAppName(String appName) {
metadataHandler.setAppName(appName);
return this;
}
/**
* Register the user-defined jni function.
*
* @param jniFunctionClreplaced
*/
public SparkJni registerJniFunction(Clreplaced jniFunctionClreplaced) {
jniLinkHandler.registerJniFunction(jniFunctionClreplaced);
return this;
}
/**
* Register the user-defined JavaBean container.
*
* @param beanClreplaced
*/
public SparkJni registerContainer(Clreplaced beanClreplaced) {
jniLinkHandler.registerBean(beanClreplaced);
return this;
}
public JniLinkHandler getJniHandler() {
return jniLinkHandler;
}
private KernelFileWrapperHeader getKernelFileWrapperHeader() {
return new KernelFileWrapperHeader(jniLinkHandler.getContainerHeaderFiles(), jniRootContainer);
}
public JniRootContainer getJniRootContainer() {
return jniRootContainer;
}
public SparkJni setDeployMode(DeployMode deployMode) {
this.deployMode = deployMode;
return this;
}
@SuppressWarnings("unused")
public DeployTimesLogger getDeployTimesLogger() {
return deployTimesLogger;
}
public DeployMode getDeployMode() {
return deployMode;
}
ClreplacedLoader getClreplacedloader() {
return metadataHandler.getClreplacedloader();
}
public void setClreplacedloader(ClreplacedLoader clreplacedloader) {
metadataHandler.setClreplacedloader(clreplacedloader);
}
public void setOverwriteKernelFile(boolean overwriteKernelFile) {
this.overWriteKernelFile = overwriteKernelFile;
}
}
19
Source : SparkJni.java
with Apache License 2.0
from tudorv91
with Apache License 2.0
from tudorv91
public SparkJni setSparkContext(JavaSparkContext javaSparkContext) {
SparkJni.javaSparkContext = javaSparkContext;
return this;
}
19
Source : TotalSumExample.java
with Apache License 2.0
from SpiRITlab
with Apache License 2.0
from SpiRITlab
/**
* This method performs the total sum operation on a plaintext vector and print out the result
* @param jsc spark context which allows the communication with worker nodes
* @param slices the number of time a task is split up
*/
public static void test_basic_total_sum(JavaSparkContext jsc, int slices) {
System.out.println("test_basic_total_sum");
// distribute a local Scala collection (lists in this case) to form 2 RDDs
JavaRDD<Integer> values_RDD = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100), slices);
// sum up the values and display
System.out.println("values_RDD:" + values_RDD.reduce((x, y) -> {
// we need to load the shared library and init a copy of SparkFHE on the executor
SparkFHEPlugin.setup();
return (x + y);
}));
}
19
Source : TotalSumExample.java
with Apache License 2.0
from SpiRITlab
with Apache License 2.0
from SpiRITlab
/**
* This method performs the total sum operation on a plaintext vector and print out the result
* @param jsc spark context which allows the communication with worker nodes
* @param slices the number of time a task is split up
*/
public static void test_basic_total_sum(JavaSparkContext jsc, int slices) {
System.out.println("test_basic_total_sum");
// distribute a local Scala collection (lists in this case) to form 2 RDDs
JavaRDD<Integer> values_RDD = jsc.parallelize(Arrays.asList(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), slices);
// sum up the values and display
System.out.println("values_RDD:" + values_RDD.reduce((x, y) -> {
// we need to load the shared library and init a copy of SparkFHE on the executor
SparkFHEPlugin.setup();
return (x + y);
}));
}
19
Source : CustomReportServiceTest.java
with Apache License 2.0
from sbl-sdsc
with Apache License 2.0
from sbl-sdsc
/**
* @author Peter Rose
*/
public clreplaced CustomReportServiceTest {
private JavaSparkContext sc;
@Before
public void setUp() throws Exception {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(AdvancedQueryTest.clreplaced.getSimpleName());
sc = new JavaSparkContext(conf);
}
@After
public void tearDown() throws Exception {
sc.close();
}
@Test
public void test1() throws IOException {
Dataset<Row> ds = CustomReportService.getDataset("pmc", "pubmedId", "depositionDate");
replacedertEquals("StructType(StructField(structureId,StringType,true), StructField(pmc,StringType,true), StructField(pubmedId,IntegerType,true), StructField(depositionDate,TimestampType,true))", ds.schema().toString());
replacedertTrue(ds.count() > 130101);
}
@Test
public void test2() throws IOException {
Dataset<Row> ds = CustomReportService.getDataset("ecNo");
replacedertEquals("StructType(StructField(structureChainId,StringType,true), StructField(structureId,StringType,true), StructField(chainId,StringType,true), StructField(ecNo,StringType,true))", ds.schema().toString());
replacedertTrue(ds.count() > 130101);
}
}
19
Source : WildTypeTest.java
with Apache License 2.0
from sbl-sdsc
with Apache License 2.0
from sbl-sdsc
public clreplaced WildTypeTest {
private static JavaSparkContext sc;
private static JavaPairRDD<String, StructureDataInterface> pdb;
@Before
public void setUp() throws Exception {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(WildTypeTest.clreplaced.getSimpleName());
sc = new JavaSparkContext(conf);
// 1PEN wildtype query 100 matches: 1PEN:1
// 1OCZ two enreplacedies wildtype query 100 matches: 1OCZ:1, 1OCZ:2
// 2ONX structure result for author query
List<String> pdbIds = Arrays.asList("1PEN", "1OCZ", "2ONX");
pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}
@After
public void tearDown() throws Exception {
sc.close();
}
// TODO the wildtype webservice of RCSB PDB is currently broken
// @Test
public void test1() throws IOException {
pdb = pdb.filter(new WildType(true, 100));
List<String> results = pdb.keys().collect();
replacedertTrue(results.contains("1PEN"));
replacedertTrue(results.contains("1OCZ"));
replacedertFalse(results.contains("2ONX"));
}
}
19
Source : PiscesTest.java
with Apache License 2.0
from sbl-sdsc
with Apache License 2.0
from sbl-sdsc
public clreplaced PiscesTest {
private JavaSparkContext sc;
private JavaPairRDD<String, StructureDataInterface> pdb;
@Before
public void setUp() throws Exception {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(PolymerCompositionTest.clreplaced.getSimpleName());
sc = new JavaSparkContext(conf);
// "4R4X.A" and "5X42.B" should preplaced filter
List<String> pdbIds = Arrays.asList("5X42", "4R4X", "2ONX", "1JLP");
pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}
@After
public void tearDown() throws Exception {
sc.close();
}
@Test
public void test1() throws IOException {
pdb = pdb.filter(new Pisces(20, 2.0));
List<String> results = pdb.keys().collect();
replacedertTrue(results.contains("5X42"));
replacedertTrue(results.contains("4R4X"));
replacedertFalse(results.contains("2ONX"));
replacedertFalse(results.contains("1JLP"));
}
@Test
public void test2() throws IOException {
pdb = pdb.flatMapToPair(new StructureToPolymerChains());
pdb = pdb.filter(new Pisces(20, 2.0));
List<String> results = pdb.keys().collect();
replacedertTrue(results.contains("5X42.B"));
replacedertTrue(results.contains("4R4X.A"));
replacedertFalse(results.contains("5X42.A"));
replacedertFalse(results.contains("2ONX.A"));
replacedertFalse(results.contains("1JLP.A"));
}
}
19
Source : PdbjMineSearchTest.java
with Apache License 2.0
from sbl-sdsc
with Apache License 2.0
from sbl-sdsc
public clreplaced PdbjMineSearchTest {
private JavaSparkContext sc;
private JavaPairRDD<String, StructureDataInterface> pdb;
@Before
public void setUp() throws Exception {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(PdbjMineSearchTest.clreplaced.getSimpleName());
sc = new JavaSparkContext(conf);
List<String> pdbIds = Arrays.asList("1FIN", "5JDE", "5CU4", "5L6W", "5UFU", "5IHB");
pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}
@After
public void tearDown() throws Exception {
sc.close();
}
@Test
public /**
* This test runs a SQL query and compares the results at the PDB entry
* level
*
* @throws IOException
*/
void test1() throws IOException {
String sql = "SELECT pdbid, chain FROM sifts.pdb_chain_enzyme WHERE ec_number = '2.7.11.1'";
pdb = pdb.filter(new PdbjMineSearch(sql));
List<String> matches = pdb.keys().collect();
replacedertTrue(matches.contains("5JDE"));
replacedertTrue(matches.contains("5CU4"));
replacedertTrue(matches.contains("5L6W"));
replacedertTrue(matches.contains("5UFU"));
replacedertFalse(matches.contains("5IHB"));
replacedertFalse(matches.contains("1FIN"));
}
@Test
public /**
* This test runs a chain level SQL query and compares chain level results
*
* @throws IOException
*/
void test2() throws IOException {
String sql = "SELECT e.pdbid, e.chain FROM sifts.pdb_chain_enzyme AS e WHERE e.ec_number = '2.7.11.1'";
pdb = pdb.flatMapToPair(new StructureToPolymerChains());
pdb = pdb.filter(new PdbjMineSearch(sql));
List<String> matches = pdb.keys().collect();
replacedertTrue(matches.contains("5JDE.A"));
replacedertTrue(matches.contains("5JDE.B"));
replacedertTrue(matches.contains("5CU4.A"));
// this chain is EC 2.7.11.1
replacedertTrue(matches.contains("5L6W.L"));
// this chain in not EC 2.7.11.1
replacedertFalse(matches.contains("5L6W.C"));
replacedertTrue(matches.contains("5UFU.A"));
replacedertFalse(matches.contains("5UFU.B"));
replacedertFalse(matches.contains("5UFU.C"));
replacedertFalse(matches.contains("5IHB.A"));
replacedertFalse(matches.contains("5IHB.B"));
replacedertFalse(matches.contains("5IHB.C"));
replacedertFalse(matches.contains("5IHB.D"));
}
}
19
Source : CustomReportQueryTest.java
with Apache License 2.0
from sbl-sdsc
with Apache License 2.0
from sbl-sdsc
public clreplaced CustomReportQueryTest {
private JavaSparkContext sc;
private JavaPairRDD<String, StructureDataInterface> pdb;
@Before
public void setUp() throws Exception {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(CustomReportQueryTest.clreplaced.getSimpleName());
sc = new JavaSparkContext(conf);
List<String> pdbIds = Arrays.asList("5JDE", "5CU4", "5L6W", "5UFU", "5IHB");
pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}
@After
public void tearDown() throws Exception {
sc.close();
}
@Test
public /**
* This test runs a chain level query and compares the results at the PDB entry level
* @throws IOException
*/
void test1() throws IOException {
String whereClause = "WHERE ecNo='2.7.11.1' AND source='replaced sapiens'";
pdb = pdb.filter(new CustomReportQuery(whereClause, "ecNo", "source"));
List<String> matches = pdb.keys().collect();
replacedertTrue(matches.contains("5JDE"));
replacedertTrue(matches.contains("5CU4"));
replacedertTrue(matches.contains("5L6W"));
replacedertFalse(matches.contains("5UFU"));
replacedertFalse(matches.contains("5IHB"));
}
@Test
public /**
* This test runs a chain level query and compares chain level results
* @throws IOException
*/
void test2() throws IOException {
pdb = pdb.flatMapToPair(new StructureToPolymerChains());
String whereClause = "WHERE ecNo='2.7.11.1' AND source='replaced sapiens'";
pdb = pdb.filter(new CustomReportQuery(whereClause, "ecNo", "source"));
List<String> matches = pdb.keys().collect();
replacedertTrue(matches.contains("5JDE.A"));
replacedertTrue(matches.contains("5JDE.B"));
replacedertTrue(matches.contains("5CU4.A"));
// this chain is EC 2.7.11.1
replacedertTrue(matches.contains("5L6W.L"));
// this chain in not EC 2.7.11.1
replacedertFalse(matches.contains("5L6W.C"));
replacedertFalse(matches.contains("5UFU.A"));
replacedertFalse(matches.contains("5UFU.B"));
replacedertFalse(matches.contains("5UFU.C"));
replacedertFalse(matches.contains("5IHB.A"));
replacedertFalse(matches.contains("5IHB.B"));
replacedertFalse(matches.contains("5IHB.C"));
replacedertFalse(matches.contains("5IHB.D"));
}
}
19
Source : ChemicalStructureQueryTest.java
with Apache License 2.0
from sbl-sdsc
with Apache License 2.0
from sbl-sdsc
public clreplaced ChemicalStructureQueryTest {
private JavaSparkContext sc;
private JavaPairRDD<String, StructureDataInterface> pdb;
@Before
public void setUp() throws Exception {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ChemicalStructureQueryTest.clreplaced.getSimpleName());
sc = new JavaSparkContext(conf);
List<String> pdbIds = Arrays.asList("1HYA", "2ONX", "1F27", "4QMC", "2RTL");
pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}
@After
public void tearDown() throws Exception {
sc.close();
}
@Test
public void test1() throws IOException {
pdb = pdb.filter(new ChemicalStructureQuery("CC(=O)NC1C(O)OC(CO)C(O)C1O"));
List<String> results = pdb.keys().collect();
replacedertTrue(results.contains("1HYA"));
replacedertFalse(results.contains("2ONX"));
}
@Test
public void test2() throws IOException {
pdb = pdb.filter(new ChemicalStructureQuery("OC(=O)CCCC[C@@H]1SC[C@@H]2NC(=O)N[C@H]12", ChemicalStructureQuery.EXACT, 0));
List<String> results = pdb.keys().collect();
replacedertFalse(results.contains("1HYA"));
replacedertFalse(results.contains("2ONX"));
replacedertTrue(results.contains("1F27"));
replacedertFalse(results.contains("2RTL"));
replacedertFalse(results.contains("4QMC"));
}
@Test
public void test3() throws IOException {
pdb = pdb.filter(new ChemicalStructureQuery("OC(=O)CCCC[C@@H]1SC[C@@H]2NC(=O)N[C@H]12", ChemicalStructureQuery.SUBSTRUCTURE, 0));
List<String> results = pdb.keys().collect();
replacedertFalse(results.contains("1HYA"));
replacedertFalse(results.contains("2ONX"));
replacedertTrue(results.contains("1F27"));
replacedertFalse(results.contains("2RTL"));
replacedertTrue(results.contains("4QMC"));
}
@Test
public void test4() throws IOException {
pdb = pdb.filter(new ChemicalStructureQuery("OC(=O)CCCC[C@@H]1SC[C@@H]2NC(=O)N[C@H]12", ChemicalStructureQuery.SIMILAR, 70));
List<String> results = pdb.keys().collect();
replacedertFalse(results.contains("1HYA"));
replacedertFalse(results.contains("2ONX"));
replacedertTrue(results.contains("1F27"));
replacedertTrue(results.contains("2RTL"));
replacedertTrue(results.contains("4QMC"));
}
@Test
public void test5() throws IOException {
pdb = pdb.filter(new ChemicalStructureQuery("OC(=O)CCCC[C@H]1[C@H]2NC(=O)N[C@H]2C[S@@]1=O", ChemicalStructureQuery.SUPERSTRUCTURE, 0));
List<String> results = pdb.keys().collect();
replacedertFalse(results.contains("1HYA"));
replacedertFalse(results.contains("2ONX"));
replacedertTrue(results.contains("1F27"));
replacedertFalse(results.contains("2RTL"));
replacedertTrue(results.contains("4QMC"));
}
}
19
Source : BlastClustersTest.java
with Apache License 2.0
from sbl-sdsc
with Apache License 2.0
from sbl-sdsc
public clreplaced BlastClustersTest {
private JavaSparkContext sc;
private JavaPairRDD<String, StructureDataInterface> pdb;
@Before
public void setUp() throws Exception {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(BlastClustersTest.clreplaced.getSimpleName());
sc = new JavaSparkContext(conf);
List<String> pdbIds = Arrays.asList("1O06", "2ONX");
pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}
@After
public void tearDown() throws Exception {
sc.close();
}
@Test
public /**
* This test runs a pdb level query and compares the results at the PDB entry level
* @throws IOException
*/
void test1() throws IOException, StructureException {
pdb = pdb.filter(new BlastClusters(40));
List<String> matches = pdb.keys().collect();
replacedertTrue(matches.contains("1O06"));
replacedertFalse(matches.contains("1O06.A"));
replacedertFalse(matches.contains("2ONX"));
}
@Test
public /**
* This test runs a chain level query and compares the results at the PDB entry level
* @throws IOException
*/
void test2() throws IOException, StructureException {
pdb = pdb.filter(new BlastClusters(40));
pdb = pdb.flatMapToPair(new StructureToPolymerChains());
List<String> matches = pdb.keys().collect();
replacedertFalse(matches.contains("1O06"));
replacedertTrue(matches.contains("1O06.A"));
replacedertFalse(matches.contains("2ONX.A"));
}
}
19
Source : AdvancedQueryTest.java
with Apache License 2.0
from sbl-sdsc
with Apache License 2.0
from sbl-sdsc
public clreplaced AdvancedQueryTest {
private static JavaSparkContext sc;
private static JavaPairRDD<String, StructureDataInterface> pdb;
@Before
public void setUp() throws Exception {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(AdvancedQueryTest.clreplaced.getSimpleName());
sc = new JavaSparkContext(conf);
// 1PEN wildtype query 100 matches: 1PEN:1
// 1OCZ two enreplacedies wildtype query 100 matches: 1OCZ:1, 1OCZ:2
// 2ONX structure result for author query
// 5L6W two chains: chain L is EC 2.7.11.1, chain chain C is not EC 2.7.11.1
// 5KHU many chains, chain Q is EC 2.7.11.1
// 1F3M enreplacedy 1: chains A,B, enreplacedy 2: chains B,C, all chains are EC 2.7.11.1
List<String> pdbIds = Arrays.asList("1PEN", "1OCZ", "2ONX", "5L6W", "5KHU", "1F3M");
pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}
@After
public void tearDown() throws Exception {
sc.close();
}
// TODO the wildtype query web service is currently broken
// @Test
/**
* This test runs a chain level query and compares the results at the PDB entry level
* @throws IOException
*/
public void test1() throws IOException {
String query = "<orgPdbQuery>" + "<queryType>org.pdb.query.simple.WildTypeProteinQuery</queryType>" + "<includeExprTag>Y</includeExprTag>" + "<percentSeqAlignment>100</percentSeqAlignment>" + "</orgPdbQuery>";
pdb = pdb.filter(new AdvancedQuery(query));
List<String> matches = pdb.keys().collect();
replacedertTrue(matches.contains("1PEN"));
replacedertTrue(matches.contains("1OCZ"));
replacedertFalse(matches.contains("2ONX"));
replacedertFalse(matches.contains("5L6W"));
}
@Test
public /**
* This test runs a chain level query and compares the results at the PDB entry level
* @throws IOException
*/
void test2() throws IOException {
String query = "<orgPdbQuery>" + "<queryType>org.pdb.query.simple.AdvancedAuthorQuery</queryType>" + "<searchType>All Authors</searchType><audit_author.name>Eisenberg</audit_author.name>" + "<exactMatch>false</exactMatch>" + "</orgPdbQuery>";
pdb = pdb.filter(new AdvancedQuery(query));
List<String> matches = pdb.keys().collect();
replacedertFalse(matches.contains("1PEN"));
replacedertFalse(matches.contains("1OCZ"));
replacedertTrue(matches.contains("2ONX"));
replacedertFalse(matches.contains("5L6W"));
}
@Test
public /**
* This test runs a chain level query and compares the results at the PDB entry level
* @throws IOException
*/
void test3() throws IOException {
String query = "<orgPdbQuery>" + "<queryType>org.pdb.query.simple.EnzymeClreplacedificationQuery</queryType>" + "<Enzyme_Clreplacedification>2.7.11.1</Enzyme_Clreplacedification>" + "</orgPdbQuery>";
pdb = pdb.filter(new AdvancedQuery(query));
List<String> matches = pdb.keys().collect();
replacedertFalse(matches.contains("1PEN"));
replacedertFalse(matches.contains("1OCZ"));
replacedertFalse(matches.contains("2ONX"));
replacedertTrue(matches.contains("5L6W"));
replacedertTrue(matches.contains("5KHU"));
}
@Test
public /**
* This test runs a chain level query and compares the results at the PDB chain level
* @throws IOException
*/
void test4() throws IOException {
String query = "<orgPdbQuery>" + "<queryType>org.pdb.query.simple.EnzymeClreplacedificationQuery</queryType>" + "<Enzyme_Clreplacedification>2.7.11.1</Enzyme_Clreplacedification>" + "</orgPdbQuery>";
pdb = pdb.flatMapToPair(new StructureToPolymerChains());
pdb = pdb.filter(new AdvancedQuery(query));
List<String> matches = pdb.keys().collect();
replacedertFalse(matches.contains("1PEN.A"));
replacedertFalse(matches.contains("1OCZ.A"));
replacedertFalse(matches.contains("2ONX.A"));
// only this chain is EC 2.7.11.1
replacedertTrue(matches.contains("5L6W.L"));
replacedertFalse(matches.contains("5L6W.C"));
replacedertFalse(matches.contains("5KHU.A"));
replacedertFalse(matches.contains("5KHU.B"));
// only this chain is EC 2.7.11.1
replacedertTrue(matches.contains("5KHU.Q"));
// 1F3M all chains are EC 2.7.11.1
replacedertTrue(matches.contains("1F3M.A"));
replacedertTrue(matches.contains("1F3M.B"));
replacedertTrue(matches.contains("1F3M.C"));
replacedertTrue(matches.contains("1F3M.D"));
}
}
19
Source : CoordinationGeometryTest.java
with Apache License 2.0
from sbl-sdsc
with Apache License 2.0
from sbl-sdsc
public clreplaced CoordinationGeometryTest {
private JavaSparkContext sc;
private JavaPairRDD<String, StructureDataInterface> pdb;
@Before
public void setUp() throws Exception {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ColumnarStructureTest.clreplaced.getSimpleName());
sc = new JavaSparkContext(conf);
List<String> pdbIds = Arrays.asList("5Y20");
pdb = MmtfReader.downloadFullMmtfFiles(pdbIds, sc);
}
@After
public void tearDown() throws Exception {
sc.close();
}
@Test
public void test() {
StructureDataInterface structure = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(structure, true);
// ZN A.101.ZN
Point3d center = getCoords(cs, 459);
Point3d[] neighbors = new Point3d[6];
// CYS A.7.SG
neighbors[0] = getCoords(cs, 28);
// CYS A.10.SG
neighbors[1] = getCoords(cs, 44);
// HIS A.31.ND1
neighbors[2] = getCoords(cs, 223);
// CYS A.34.SG
neighbors[3] = getCoords(cs, 245);
// CYS A.10.N
neighbors[4] = getCoords(cs, 45);
// HIS A.31.O
neighbors[5] = getCoords(cs, 220);
CoordinationGeometry geom = new CoordinationGeometry(center, neighbors);
double q3Expected = 0.9730115379131878;
replacedertEquals(q3Expected, geom.q3(), 0.0001);
double q4Expected = 0.9691494056145086;
replacedertEquals(q4Expected, geom.q4(), 0.0001);
double q5Expected = 0.5126001729084566;
replacedertEquals(q5Expected, geom.q5(), 0.0001);
double q6Expected = 0.2723305441457363;
replacedertEquals(q6Expected, geom.q6(), 0.0001);
}
private static Point3d getCoords(ColumnarStructure cs, int index) {
return new Point3d(cs.getxCoords()[index], cs.getyCoords()[index], cs.getzCoords()[index]);
}
}
19
Source : ColumnarStructureXTest.java
with Apache License 2.0
from sbl-sdsc
with Apache License 2.0
from sbl-sdsc
public clreplaced ColumnarStructureXTest {
private JavaSparkContext sc;
@Before
public void setUp() throws Exception {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ColumnarStructureTest.clreplaced.getSimpleName());
sc = new JavaSparkContext(conf);
}
@After
public void tearDown() throws Exception {
sc.close();
}
@Test
public void test1() {
List<String> pdbIds = Arrays.asList("5NVB");
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.downloadFullMmtfFiles(pdbIds, sc);
StructureDataInterface s = pdb.values().first();
ColumnarStructureX cs = new ColumnarStructureX(s, true);
replacedertEquals(cs.getNormalizedbFactors()[0], Float.MAX_VALUE, 0.000001);
}
@Test
public void test2() {
List<String> pdbIds = Arrays.asList("4QXX");
JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.downloadFullMmtfFiles(pdbIds, sc);
StructureDataInterface s = pdb.values().first();
ColumnarStructureX cs = new ColumnarStructureX(s, true);
replacedertTrue(cs.isGroupWithAlternateLocations()[6]);
}
}
19
Source : ColumnarStructureTest.java
with Apache License 2.0
from sbl-sdsc
with Apache License 2.0
from sbl-sdsc
public clreplaced ColumnarStructureTest {
private JavaSparkContext sc;
private JavaPairRDD<String, StructureDataInterface> pdb;
@Before
public void setUp() throws Exception {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ColumnarStructureTest.clreplaced.getSimpleName());
sc = new JavaSparkContext(conf);
List<String> pdbIds = Arrays.asList("1STP");
pdb = MmtfReader.downloadFullMmtfFiles(pdbIds, sc);
}
@After
public void tearDown() throws Exception {
sc.close();
}
@Test
public void testGetxCoords() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
replacedertEquals(26.260, cs.getxCoords()[20], 0.001);
}
@Test
public void testGetElements() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
replacedertEquals("C", cs.getElements()[20]);
}
@Test
public void testGetAtomNames() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
replacedertEquals("CG2", cs.getAtomNames()[900]);
}
@Test
public void testGetGroupNames() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
replacedertEquals("VAL", cs.getGroupNames()[900]);
}
@Test
public void testIsPolymer() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
// chain A
replacedertEquals(true, cs.isPolymer()[100]);
// BTN
replacedertEquals(false, cs.isPolymer()[901]);
// HOH
replacedertEquals(false, cs.isPolymer()[917]);
}
@Test
public void testGetGroupNumbers() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
replacedertEquals("130", cs.getGroupNumbers()[877]);
}
@Test
public void testGetChainIds() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
replacedertEquals("A", cs.getChainIds()[100]);
// BTN
replacedertEquals("B", cs.getChainIds()[901]);
// HOH
replacedertEquals("C", cs.getChainIds()[917]);
}
@Test
public void testGetChemCompTypes() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
replacedertEquals("PEPTIDE LINKING", cs.getChemCompTypes()[100]);
// BTN
replacedertEquals("NON-POLYMER", cs.getChemCompTypes()[901]);
// HOH
replacedertEquals("NON-POLYMER", cs.getChemCompTypes()[917]);
}
@Test
public void testGetEnreplacedyTypes() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
replacedertEquals("PRO", cs.getEnreplacedyTypes()[100]);
// BTN
replacedertEquals("LGO", cs.getEnreplacedyTypes()[901]);
// HOH
replacedertEquals("WAT", cs.getEnreplacedyTypes()[917]);
}
@Test
public void testGetChainEnreplacedyTypes() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
String[] enreplacedyTypes = cs.getChainEnreplacedyTypes();
replacedertEquals("PRO", enreplacedyTypes[0]);
// BTN
replacedertEquals("LGO", enreplacedyTypes[1]);
// HOH
replacedertEquals("WAT", enreplacedyTypes[2]);
}
@Test
public void testGroupToAtomIndices() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
int[] groupToAtomIndices = cs.getGroupToAtomIndices();
// ALA-13
replacedertEquals(0, groupToAtomIndices[0]);
// GLU-14
replacedertEquals(5, groupToAtomIndices[1]);
replacedertEquals(14, groupToAtomIndices[2]);
// last HOH
replacedertEquals(1000, groupToAtomIndices[205]);
// end
replacedertEquals(1001, groupToAtomIndices[206]);
}
@Test
public void testChainToAtomIndices() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
int[] chainToAtomIndices = cs.getChainToAtomIndices();
// chain A
replacedertEquals(0, chainToAtomIndices[0]);
// BTN
replacedertEquals(901, chainToAtomIndices[1]);
// HOH
replacedertEquals(917, chainToAtomIndices[2]);
// end
replacedertEquals(1001, chainToAtomIndices[3]);
}
@Test
public void testChainToGroupIndices() {
StructureDataInterface s = pdb.values().first();
ColumnarStructure cs = new ColumnarStructure(s, true);
int[] chainToGroupIndices = cs.getChainToGroupIndices();
// chain A
replacedertEquals(0, chainToGroupIndices[0]);
// BTN
replacedertEquals(121, chainToGroupIndices[1]);
// HOH
replacedertEquals(122, chainToGroupIndices[2]);
// end
replacedertEquals(206, chainToGroupIndices[3]);
}
}
See More Examples