org.apache.spark.api.java.JavaSparkContext

Here are the examples of the java api org.apache.spark.api.java.JavaSparkContext taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

830 Examples 7

19 Source : PageOneStepConvertRateSpark.java
with GNU General Public License v3.0
from wlhbdp

/**
 * 页面切片生成与匹配算法
 * @param sc
 * @param sessionid2actionsRDD
 * @param taskParam
 * @return
 */
private static JavaPairRDD<String, Integer> generateAndMatchPageSplit(JavaSparkContext sc, JavaPairRDD<String, Iterable<Row>> sessionid2actionsRDD, JSONObject taskParam) {
    String targetPageFlow = ParamUtils.getParam(taskParam, Constants.PARAM_TARGET_PAGE_FLOW);
    final Broadcast<String> targetPageFlowBroadcast = sc.broadcast(targetPageFlow);
    return sessionid2actionsRDD.flatMapToPair(new PairFlatMapFunction<Tuple2<String, Iterable<Row>>, String, Integer>() {

        private final Long serialVersionUID = 1L;

        @Override
        public Iterator<Tuple2<String, Integer>> call(Tuple2<String, Iterable<Row>> tuple) throws Exception {
            // 定义返回list
            List<Tuple2<String, Integer>> list = new ArrayList<Tuple2<String, Integer>>();
            // 获取到当前session的访问行为的迭代器
            Iterator<Row> iterator = tuple._2.iterator();
            // 获取使用者指定的页面流
            // 使用者指定的页面流,1,2,3,4,5,6,7
            // 1->2的转化率是多少?2->3的转化率是多少?
            String[] targetPages = targetPageFlowBroadcast.value().split(",");
            // 这里,我们拿到的session的访问行为,默认情况下是乱序的
            // 比如说,正常情况下,我们希望拿到的数据,是按照时间顺序排序的
            // 但是问题是,默认是不排序的
            // 所以,我们第一件事情,对session的访问行为数据按照时间进行排序
            // 举例,反例
            // 比如,3->5->4->10->7
            // 3->4->5->7->10
            // 排序
            List<Row> rows = new ArrayList<Row>();
            while (iterator.hasNext()) {
                rows.add(iterator.next());
            }
            Collections.sort(rows, new Comparator<Row>() {

                @Override
                public int compare(Row o1, Row o2) {
                    String actionTime1 = o1.getString(4);
                    String actionTime2 = o2.getString(4);
                    Date date1 = DateUtils.parseTime(actionTime1);
                    Date date2 = DateUtils.parseTime(actionTime2);
                    return (int) (date1.getTime() - date2.getTime());
                }
            });
            // 页面切片的生成,以及页面流的匹配
            Long lastPageId = null;
            for (Row row : rows) {
                Long pageid = row.getLong(3);
                if (lastPageId == null) {
                    lastPageId = pageid;
                    continue;
                }
                // 生成一个页面切片
                // 3,5,2,1,8,9
                // lastPageId=3
                // 5,切片,3_5
                String pageSplit = lastPageId + "_" + pageid;
                // 对这个切片判断一下,是否在用户指定的页面流中
                for (int i = 1; i < targetPages.length; i++) {
                    // 比如说,用户指定的页面流是3,2,5,8,1
                    // 遍历的时候,从索引1开始,就是从第二个页面开始
                    // 3_2
                    String targetPageSplit = targetPages[i - 1] + "_" + targetPages[i];
                    if (pageSplit.equals(targetPageSplit)) {
                        list.add(new Tuple2<String, Integer>(pageSplit, 1));
                        break;
                    }
                }
                lastPageId = pageid;
            }
            return list.iterator();
        }
    });
}

19 Source : BlocksFromEntityIndexTest.java
with Apache License 2.0
from vefthym

/**
 * @author vefthym
 */
public clreplaced BlocksFromEnreplacedyIndexTest {

    SparkSession spark;

    JavaSparkContext jsc;

    public BlocksFromEnreplacedyIndexTest() {
    }

    @BeforeClreplaced
    public static void setUpClreplaced() {
    }

    @AfterClreplaced
    public static void tearDownClreplaced() {
    }

    @Before
    public void setUp() {
        // only for local mode
        System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Doreplacedents\\hadoop_home");
        spark = SparkSession.builder().appName("test").config("spark.sql.warehouse.dir", "/file:/tmp").config("spark.executor.instances", 1).config("spark.executor.cores", 1).config("spark.executor.memory", "1G").config("spark.driver.maxResultSize", "1g").config("spark.master", "local").getOrCreate();
        jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
    }

    @After
    public void tearDown() {
    }

    /**
     * Test of run method, of clreplaced BlocksFromEnreplacedyIndex.
     */
    @Test
    public void testRun() {
        System.out.println("blocks from enreplacedy index");
        List<String> dummyBlocks = new ArrayList<>();
        dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
        dummyBlocks.add("1\t3#4#5#;-1#-5#");
        dummyBlocks.add("2\t5#;-5#");
        dummyBlocks.add("3\t5#;");
        JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
        LongAcreplacedulator BLOCK_replacedIGNMENTS = jsc.sc().longAcreplacedulator();
        BlockFilteringAdvanced blockFiltering = new BlockFilteringAdvanced();
        JavaPairRDD<Integer, IntArrayList> enreplacedyIndex = blockFiltering.run(blockingInput, BLOCK_replacedIGNMENTS);
        BlocksFromEnreplacedyIndex instance = new BlocksFromEnreplacedyIndex();
        LongAcreplacedulator cleanBlocksAcreplaced = jsc.sc().longAcreplacedulator();
        LongAcreplacedulator numComparisons = jsc.sc().longAcreplacedulator();
        JavaPairRDD<Integer, IntArrayList> result = instance.run(enreplacedyIndex, cleanBlocksAcreplaced, numComparisons);
        List<Tuple2<Integer, IntArrayList>> expResult = new ArrayList<>();
        expResult.add(new Tuple2<>(0, new IntArrayList(new int[] { 1, 2, 3, 4, -1, -2, -3, -4 })));
        expResult.add(new Tuple2<>(1, new IntArrayList(new int[] { 3, 4, 5, -1, -5 })));
        expResult.add(new Tuple2<>(2, new IntArrayList(new int[] { 5, -5 })));
        JavaPairRDD<Integer, IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
        List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
        List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
        expResultList.stream().forEach(lisreplacedem -> Collections.sort(lisreplacedem._2()));
        resultList.stream().forEach(lisreplacedem -> Collections.sort(lisreplacedem._2()));
        System.out.println("Result: " + Arrays.toString(resultList.toArray()));
        System.out.println("Expect: " + Arrays.toString(expResultList.toArray()));
        replacedertEquals((long) cleanBlocksAcreplaced.value(), 3);
        replacedertEquals((long) numComparisons.value(), 23);
        replacedertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
    // replacedertEquals(expResultRDD, result);
    }
}

19 Source : BlockFilteringAdvancedTest.java
with Apache License 2.0
from vefthym

/**
 * @author vefthym
 */
public clreplaced BlockFilteringAdvancedTest {

    SparkSession spark;

    JavaSparkContext jsc;

    public BlockFilteringAdvancedTest() {
    }

    @BeforeClreplaced
    public static void setUpClreplaced() {
    }

    @AfterClreplaced
    public static void tearDownClreplaced() {
    }

    @Before
    public void setUp() {
        // only for local mode
        System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Doreplacedents\\hadoop_home");
        spark = SparkSession.builder().appName("test").config("spark.sql.warehouse.dir", "/file:/tmp").config("spark.executor.instances", 1).config("spark.executor.cores", 1).config("spark.executor.memory", "1G").config("spark.driver.maxResultSize", "1g").config("spark.master", "local").getOrCreate();
        jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
    }

    @After
    public void tearDown() {
    }

    /**
     * Test of run method, of clreplaced BlockFilteringAdvanced.
     */
    @Test
    public void testRun() {
        System.out.println("getEnreplacedyBlocksAdvanced");
        List<String> dummyBlocks = new ArrayList<>();
        dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
        dummyBlocks.add("1\t3#4#5#;-1#-5#");
        dummyBlocks.add("2\t5#;-5#");
        dummyBlocks.add("3\t5#;");
        JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
        LongAcreplacedulator BLOCK_replacedIGNMENTS = jsc.sc().longAcreplacedulator();
        BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
        JavaPairRDD<Integer, IntArrayList> result = instance.run(blockingInput, BLOCK_replacedIGNMENTS);
        List<Tuple2<Integer, IntArrayList>> expResult = new ArrayList<>();
        expResult.add(new Tuple2<>(1, new IntArrayList(new int[] { 0 })));
        expResult.add(new Tuple2<>(2, new IntArrayList(new int[] { 0 })));
        expResult.add(new Tuple2<>(3, new IntArrayList(new int[] { 1, 0 })));
        expResult.add(new Tuple2<>(4, new IntArrayList(new int[] { 1, 0 })));
        expResult.add(new Tuple2<>(5, new IntArrayList(new int[] { 2, 1 })));
        expResult.add(new Tuple2<>(-1, new IntArrayList(new int[] { 1, 0 })));
        expResult.add(new Tuple2<>(-2, new IntArrayList(new int[] { 0 })));
        expResult.add(new Tuple2<>(-3, new IntArrayList(new int[] { 0 })));
        expResult.add(new Tuple2<>(-4, new IntArrayList(new int[] { 0 })));
        expResult.add(new Tuple2<>(-5, new IntArrayList(new int[] { 2, 1 })));
        JavaPairRDD<Integer, IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
        List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
        List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
        System.out.println("Result: " + Arrays.toString(resultList.toArray()));
        System.out.println("Expect: " + Arrays.toString(expResultList.toArray()));
        replacedertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
        replacedertEquals((long) BLOCK_replacedIGNMENTS.value(), 15);
    }

    /**
     * Test of parseBlockCollection method, of clreplaced BlockFilteringAdvanced.
     */
    @Test
    public void testParseBlockCollection() {
        System.out.println("parseBlockCollection");
        List<String> dummyBlocks = new ArrayList<>();
        dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
        dummyBlocks.add("1\t3#4#5#;-1#-5#");
        dummyBlocks.add("2\t5#;-5#");
        dummyBlocks.add("3\t5#;");
        JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
        BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
        JavaPairRDD<Integer, IntArrayList> result = instance.parseBlockCollection(blockingInput);
        List<Tuple2<Integer, IntArrayList>> dummyBlocksParsed = new ArrayList<>();
        dummyBlocksParsed.add(new Tuple2<>(0, new IntArrayList(new int[] { 1, 2, 3, 4, 5, -1, -2, -3, -4, -5 })));
        dummyBlocksParsed.add(new Tuple2<>(1, new IntArrayList(new int[] { 3, 4, 5, -1, -5 })));
        dummyBlocksParsed.add(new Tuple2<>(2, new IntArrayList(new int[] { 5, -5 })));
        dummyBlocksParsed.add(new Tuple2<>(3, new IntArrayList(new int[] { 5 })));
        JavaPairRDD<Integer, IntArrayList> expResult = jsc.parallelizePairs(dummyBlocksParsed);
        List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
        List<Tuple2<Integer, IntArrayList>> expResultList = expResult.collect();
        System.out.println("Result: " + Arrays.toString(resultList.toArray()));
        System.out.println("Expect: " + Arrays.toString(expResultList.toArray()));
        replacedertEquals(resultList, expResultList);
    }

    /**
     * Test of getEnreplacedyBlocksAdvanced method, of clreplaced BlockFilteringAdvanced.
     * @throws java.lang.IllegalAccessException
     * @throws java.lang.reflect.InvocationTargetException
     * @throws java.lang.NoSuchMethodException
     */
    @Test
    public void testGetEnreplacedyBlocksAdvanced() throws IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException {
        System.out.println("getEnreplacedyBlocksAdvanced");
        List<String> dummyBlocks = new ArrayList<>();
        dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
        dummyBlocks.add("1\t3#4#5#;-1#-5#");
        dummyBlocks.add("2\t5#;-5#");
        dummyBlocks.add("3\t5#;");
        JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
        BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
        JavaPairRDD<Integer, IntArrayList> parsedBlocks = instance.parseBlockCollection(blockingInput);
        Method method = BlockFilteringAdvanced.clreplaced.getDeclaredMethod("getEnreplacedyBlocksAdvanced", JavaPairRDD.clreplaced);
        method.setAccessible(true);
        JavaPairRDD<Integer, Tuple2<Integer, Integer>> result = (JavaPairRDD<Integer, Tuple2<Integer, Integer>>) method.invoke(instance, parsedBlocks);
        List<Tuple2<Integer, Tuple2<Integer, Integer>>> expResult = new ArrayList<>();
        expResult.add(new Tuple2<>(1, new Tuple2<>(0, 5)));
        expResult.add(new Tuple2<>(2, new Tuple2<>(0, 5)));
        expResult.add(new Tuple2<>(3, new Tuple2<>(0, 5)));
        expResult.add(new Tuple2<>(4, new Tuple2<>(0, 5)));
        expResult.add(new Tuple2<>(5, new Tuple2<>(0, 5)));
        expResult.add(new Tuple2<>(-1, new Tuple2<>(0, 5)));
        expResult.add(new Tuple2<>(-2, new Tuple2<>(0, 5)));
        expResult.add(new Tuple2<>(-3, new Tuple2<>(0, 5)));
        expResult.add(new Tuple2<>(-4, new Tuple2<>(0, 5)));
        expResult.add(new Tuple2<>(-5, new Tuple2<>(0, 5)));
        expResult.add(new Tuple2<>(3, new Tuple2<>(1, 3)));
        expResult.add(new Tuple2<>(4, new Tuple2<>(1, 3)));
        expResult.add(new Tuple2<>(5, new Tuple2<>(1, 3)));
        expResult.add(new Tuple2<>(-1, new Tuple2<>(1, 3)));
        expResult.add(new Tuple2<>(-5, new Tuple2<>(1, 3)));
        expResult.add(new Tuple2<>(5, new Tuple2<>(2, 1)));
        expResult.add(new Tuple2<>(-5, new Tuple2<>(2, 1)));
        // expResult.add(new Tuple2<>(5, new Tuple2<>(3,0))); //null result
        JavaPairRDD<Integer, Tuple2<Integer, Integer>> expResultRDD = jsc.parallelizePairs(expResult);
        List<Tuple2<Integer, Tuple2<Integer, Integer>>> resultList = result.collect();
        List<Tuple2<Integer, Tuple2<Integer, Integer>>> expResultList = expResultRDD.collect();
        System.out.println("Result: " + Arrays.toString(resultList.toArray()));
        System.out.println("Expect: " + Arrays.toString(expResultList.toArray()));
        replacedertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
    }

    /**
     * Test of getEnreplacedyIndex method, of clreplaced BlockFilteringAdvanced.
     */
    @Test
    public void testGetEnreplacedyIndex() throws NoSuchMethodException, IllegalAccessException, IllegalArgumentException, InvocationTargetException {
        System.out.println("getEnreplacedyIndex");
        List<String> dummyBlocks = new ArrayList<>();
        dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
        dummyBlocks.add("1\t3#4#5#;-1#-5#");
        dummyBlocks.add("2\t5#;-5#");
        dummyBlocks.add("3\t5#;");
        JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
        BlockFilteringAdvanced instance = new BlockFilteringAdvanced();
        JavaPairRDD<Integer, IntArrayList> parsedBlocks = instance.parseBlockCollection(blockingInput);
        Method method1 = BlockFilteringAdvanced.clreplaced.getDeclaredMethod("getEnreplacedyBlocksAdvanced", JavaPairRDD.clreplaced);
        method1.setAccessible(true);
        JavaPairRDD<Integer, Tuple2<Integer, Integer>> enreplacedyBlocks = (JavaPairRDD<Integer, Tuple2<Integer, Integer>>) method1.invoke(instance, parsedBlocks);
        Method method2 = BlockFilteringAdvanced.clreplaced.getDeclaredMethod("getEnreplacedyIndex", JavaPairRDD.clreplaced, LongAcreplacedulator.clreplaced);
        method2.setAccessible(true);
        LongAcreplacedulator BLOCK_replacedIGNMENTS = jsc.sc().longAcreplacedulator();
        JavaPairRDD<Integer, IntArrayList> result = (JavaPairRDD<Integer, IntArrayList>) method2.invoke(instance, enreplacedyBlocks, BLOCK_replacedIGNMENTS);
        // final int MAX_BLOCKS = ((Double)Math.floor(3*numBlocks/4+1)).intValue(); //|_ 3|Bi|/4+1 _| //preprocessing
        List<Tuple2<Integer, IntArrayList>> expResult = new ArrayList<>();
        expResult.add(new Tuple2<>(-2, new IntArrayList(new int[] { 0 })));
        expResult.add(new Tuple2<>(4, new IntArrayList(new int[] { 1, 0 })));
        expResult.add(new Tuple2<>(-1, new IntArrayList(new int[] { 1, 0 })));
        expResult.add(new Tuple2<>(-5, new IntArrayList(new int[] { 2, 1 })));
        expResult.add(new Tuple2<>(-4, new IntArrayList(new int[] { 0 })));
        expResult.add(new Tuple2<>(1, new IntArrayList(new int[] { 0 })));
        expResult.add(new Tuple2<>(-3, new IntArrayList(new int[] { 0 })));
        expResult.add(new Tuple2<>(3, new IntArrayList(new int[] { 1, 0 })));
        expResult.add(new Tuple2<>(5, new IntArrayList(new int[] { 2, 1 })));
        expResult.add(new Tuple2<>(2, new IntArrayList(new int[] { 0 })));
        JavaPairRDD<Integer, IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
        List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
        List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
        System.out.println("Result: " + Arrays.toString(resultList.toArray()));
        System.out.println("Expect: " + Arrays.toString(expResultList.toArray()));
        replacedertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
        replacedertEquals((long) BLOCK_replacedIGNMENTS.value(), 15);
    }
}

19 Source : EntityBasedCNPMapPhaseTest.java
with Apache License 2.0
from vefthym

/**
 * @author vefthym
 */
public clreplaced EnreplacedyBasedCNPMapPhaseTest {

    SparkSession spark;

    JavaSparkContext jsc;

    public EnreplacedyBasedCNPMapPhaseTest() {
    }

    @BeforeClreplaced
    public static void setUpClreplaced() {
    }

    @AfterClreplaced
    public static void tearDownClreplaced() {
    }

    @Before
    public void setUp() {
        // only for local mode
        System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Doreplacedents\\hadoop_home");
        spark = SparkSession.builder().appName("test").config("spark.sql.warehouse.dir", "/file:/tmp").config("spark.executor.instances", 1).config("spark.executor.cores", 1).config("spark.executor.memory", "1G").config("spark.driver.maxResultSize", "1g").config("spark.master", "local").getOrCreate();
        jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
    }

    @After
    public void tearDown() {
    }

    /**
     * Test of getMapOutput method, of clreplaced EnreplacedyBasedCNPMapPhase.
     */
    /*
    @Test
    public void testGetMapOutput() {
        System.out.println("getMapOutput");
        JavaPairRDD<Integer, IntArrayList> blocksFromEI = null;
        JavaPairRDD<Integer, IntArrayList> expResult = null;
        JavaPairRDD<Integer, IntArrayList> result = CNPMapPhase.getMapOutput(blocksFromEI);
        replacedertEquals(expResult, result);
        // TODO review the generated test code and remove the default call to fail.
        fail("The test case is a prototype.");
    }*/
    /**
     * Test of getMapOutputWJS method, of clreplaced CNPMapPhase.
     */
    @Test
    public void testGetMapOutputWJS() {
        System.out.println("getMapOutputWJS");
        System.out.println("blocks from enreplacedy index");
        List<String> dummyBlocks = new ArrayList<>();
        dummyBlocks.add("0\t1#2#3#4#5#;-1#-2#-3#-4#-5#");
        dummyBlocks.add("1\t3#4#5#;-1#-5#");
        dummyBlocks.add("2\t5#;-5#");
        dummyBlocks.add("3\t5#;");
        JavaRDD<String> blockingInput = jsc.parallelize(dummyBlocks);
        LongAcreplacedulator BLOCK_replacedIGNMENTS = jsc.sc().longAcreplacedulator();
        BlockFilteringAdvanced blockFiltering = new BlockFilteringAdvanced();
        JavaPairRDD<Integer, IntArrayList> enreplacedyIndex = blockFiltering.run(blockingInput, BLOCK_replacedIGNMENTS);
        BlocksFromEnreplacedyIndex bfei = new BlocksFromEnreplacedyIndex();
        LongAcreplacedulator cleanBlocksAcreplaced = jsc.sc().longAcreplacedulator();
        LongAcreplacedulator numComparisons = jsc.sc().longAcreplacedulator();
        JavaPairRDD<Integer, IntArrayList> filteredBlocks = bfei.run(enreplacedyIndex, cleanBlocksAcreplaced, numComparisons);
        List<Tuple2<Integer, IntArrayList>> tweakedBlocks = new ArrayList<>(filteredBlocks.collect());
        // this should not alter the results
        tweakedBlocks.add(new Tuple2<>(-1, new IntArrayList(new int[] { -100 })));
        filteredBlocks = jsc.parallelizePairs(tweakedBlocks);
        JavaPairRDD<Integer, IntArrayList> result = CNPMapPhase.getMapOutputWJS(filteredBlocks);
        List<Tuple2<Integer, IntArrayList>> expResult = new ArrayList<>();
        expResult.add(new Tuple2<>(4, new IntArrayList(new int[] { 4, -2, -1, -4, -3 })));
        expResult.add(new Tuple2<>(1, new IntArrayList(new int[] { 4, -2, -1, -4, -3 })));
        expResult.add(new Tuple2<>(3, new IntArrayList(new int[] { 4, -2, -1, -4, -3 })));
        expResult.add(new Tuple2<>(2, new IntArrayList(new int[] { 4, -2, -1, -4, -3 })));
        expResult.add(new Tuple2<>(-2, new IntArrayList(new int[] { 4, 4, 1, 3, 2 })));
        expResult.add(new Tuple2<>(-1, new IntArrayList(new int[] { 4, 4, 1, 3, 2 })));
        expResult.add(new Tuple2<>(-4, new IntArrayList(new int[] { 4, 4, 1, 3, 2 })));
        expResult.add(new Tuple2<>(-3, new IntArrayList(new int[] { 4, 4, 1, 3, 2 })));
        expResult.add(new Tuple2<>(4, new IntArrayList(new int[] { 3, -1, -5 })));
        expResult.add(new Tuple2<>(3, new IntArrayList(new int[] { 3, -1, -5 })));
        expResult.add(new Tuple2<>(5, new IntArrayList(new int[] { 3, -1, -5 })));
        expResult.add(new Tuple2<>(-1, new IntArrayList(new int[] { 2, 4, 3, 5 })));
        expResult.add(new Tuple2<>(-5, new IntArrayList(new int[] { 2, 4, 3, 5 })));
        expResult.add(new Tuple2<>(5, new IntArrayList(new int[] { 1, -5 })));
        expResult.add(new Tuple2<>(-5, new IntArrayList(new int[] { 1, 5 })));
        JavaPairRDD<Integer, IntArrayList> expResultRDD = jsc.parallelizePairs(expResult);
        List<Tuple2<Integer, IntArrayList>> resultList = result.collect();
        List<Tuple2<Integer, IntArrayList>> expResultList = expResultRDD.collect();
        // expResultList.stream().forEach(lisreplacedem -> Collections.sort(lisreplacedem._2()));
        // resultList.stream().forEach(lisreplacedem -> Collections.sort(lisreplacedem._2()));
        System.out.println("Result: " + Arrays.toString(resultList.toArray()));
        System.out.println("Expect: " + Arrays.toString(expResultList.toArray()));
        replacedertEquals(new HashSet<>(resultList), new HashSet<>(expResultList));
    }
}

19 Source : EvaluateLabelMatchingResults.java
with Apache License 2.0
from vefthym

public static void main(String[] args) {
    String tmpPath;
    String master;
    String inputTriples1, inputTriples2, enreplacedyIds1, enreplacedyIds2;
    String resultsPath, groundTruthPath;
    if (args.length == 0) {
        // only for local mode
        System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Doreplacedents\\hadoop_home");
        tmpPath = "/file:C:\\tmp";
        master = "local[2]";
        inputTriples1 = "";
        inputTriples2 = "";
        enreplacedyIds1 = "";
        enreplacedyIds2 = "";
        resultsPath = "/file:C:\\Users\\VASILIS\\Doreplacedents\\OAEI_Datasets\\exportedBlocks\\testInput";
        groundTruthPath = "";
    } else if (args.length == 5) {
        tmpPath = "/file:/tmp";
        // master = "spark://master:7077";
        inputTriples1 = args[0];
        inputTriples2 = args[1];
        enreplacedyIds1 = args[2];
        enreplacedyIds2 = args[3];
        groundTruthPath = args[4];
    } else {
        System.out.println("You can run match evaluation with the following arguments:" + "0: inputTriples1" + "1: inputTriples2" + "2: enreplacedyIds1" + "3: enreplacedyIds2" + "4: ground truth path");
        return;
    }
    String appName = "Evaluation of label matching";
    SparkSession spark = Utils.setUpSpark(appName, 288, 8, 3, tmpPath);
    int PARALLELISM = spark.sparkContext().getConf().getInt("spark.default.parallelism", 144);
    JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
    // //////////////////////
    // start the processing//
    // //////////////////////
    System.out.println("Starting the evaluation...");
    // YAGO-IMDb
    Set<String> labelAtts1 = new HashSet<>(Arrays.asList("rdfs:label", "label", "skos:prefLabel"));
    Set<String> labelAtts2 = labelAtts1;
    String GT_SEPARATOR = ",";
    if (groundTruthPath.contains("music")) {
        GT_SEPARATOR = " ";
        // BBCmusic
        labelAtts1 = new HashSet<>(Arrays.asList("<http://purl.org/dc/elements/1.1/replacedle>", "<http://open.vocab.org/terms/sortLabel>", "<http://xmlns.com/foaf/0.1/name>"));
        labelAtts2 = new HashSet<>(Arrays.asList("<http://www.w3.org/2000/01/rdf-schema#label>", "<http://dbpedia.org/property/name>", "<http://xmlns.com/foaf/0.1/name>"));
    }
    if (inputTriples1.contains("rexa")) {
        labelAtts1 = new HashSet<>(Arrays.asList("http://xmlns.com/foaf/0.1/name", "http://www.w3.org/2000/01/rdf-schema#label"));
        labelAtts2 = labelAtts1;
    }
    String SEPARATOR = (inputTriples1.endsWith(".tsv")) ? "\t" : " ";
    // load the results
    JavaPairRDD<Integer, Integer> matches = new LabelMatchingHeuristic().getMatchesFromLabels(jsc.textFile(inputTriples1, PARALLELISM), jsc.textFile(inputTriples2, PARALLELISM), jsc.textFile(enreplacedyIds1, PARALLELISM), jsc.textFile(enreplacedyIds2, PARALLELISM), SEPARATOR, labelAtts1, labelAtts2);
    // Start the evaluation
    LongAcreplacedulator TPs = jsc.sc().longAcreplacedulator("TPs");
    LongAcreplacedulator FPs = jsc.sc().longAcreplacedulator("FPs");
    LongAcreplacedulator FNs = jsc.sc().longAcreplacedulator("FNs");
    EvaluateLabelMatchingResults evaluation = new EvaluateLabelMatchingResults();
    JavaPairRDD<Integer, Integer> gt;
    if (groundTruthPath.contains("estaurant") || groundTruthPath.contains("Rexa_DBLP")) {
        GT_SEPARATOR = "\t";
        gt = Utils.readGroundTruthIds(jsc.textFile(groundTruthPath), GT_SEPARATOR).cache();
    } else {
        gt = Utils.getGroundTruthIdsFromEnreplacedyIds(jsc.textFile(enreplacedyIds1, PARALLELISM), jsc.textFile(enreplacedyIds2, PARALLELISM), jsc.textFile(groundTruthPath), GT_SEPARATOR).cache();
    }
    gt.cache();
    System.out.println("Finished loading the ground truth with " + gt.count() + " matches, now evaluating the results...");
    evaluation.evaluateResultsNEW(matches, gt, TPs, FPs, FNs);
    System.out.println("Evaluation finished successfully.");
    EvaluateLabelMatchingResults.printResults(TPs.value(), FPs.value(), FNs.value());
    spark.stop();
}

19 Source : RelationsRank.java
with Apache License 2.0
from vefthym

/**
 * return a map of topN neighbors per enreplacedy (reversed to point to in-neighbors (values) having the key enreplacedy as their top out-neighbor)
 * @param rawTriples
 * @param SEPARATOR
 * @param enreplacedyIdsRDD
 * @param MIN_SUPPORT_THRESHOLD
 * @param N topN neighbors per enreplacedy
 * @param positiveIds
 * @param jsc
 * @return
 */
public Map<Integer, IntArrayList> run(JavaRDD<String> rawTriples, String SEPARATOR, JavaRDD<String> enreplacedyIdsRDD, float MIN_SUPPORT_THRESHOLD, int N, boolean positiveIds, JavaSparkContext jsc) {
    // rawTriples.persist(StorageLevel.MEMORY_AND_DISK_SER());
    // List<String> subjects = Utils.getEnreplacedyUrlsFromEnreplacedyRDDInOrder(rawTriples, SEPARATOR); //a list of (distinct) subject URLs, keeping insertion order (from original triples file)
    // Object2IntOpenHashMap<String> subjects = Utils.getEnreplacedyIdsMapping(rawTriples, SEPARATOR);
    Object2IntOpenHashMap<String> enreplacedyIds = Utils.readEnreplacedyIdsMapping(enreplacedyIdsRDD, positiveIds);
    System.out.println("Found " + enreplacedyIds.size() + " enreplacedies in collection " + (positiveIds ? "1" : "2"));
    long numEnreplacediesSquared = (long) enreplacedyIds.keySet().size();
    numEnreplacediesSquared *= numEnreplacediesSquared;
    Broadcast<Object2IntOpenHashMap<String>> enreplacedyIds_BV = jsc.broadcast(enreplacedyIds);
    // a list of (s,o) for each predicate
    JavaPairRDD<String, List<Tuple2<Integer, Integer>>> relationIndex = getRelationIndex(rawTriples, SEPARATOR, enreplacedyIds_BV);
    // rawTriples.unpersist();
    relationIndex.persist(StorageLevel.MEMORY_AND_DISK_SER());
    List<String> relationsRank = getRelationsRank(relationIndex, MIN_SUPPORT_THRESHOLD, numEnreplacediesSquared);
    System.out.println("Top-5 relations in collection " + (positiveIds ? "1: " : "2: ") + Arrays.toString(relationsRank.subList(0, Math.min(5, relationsRank.size())).toArray()));
    // action
    JavaPairRDD<Integer, IntArrayList> topOutNeighbors = getTopOutNeighborsPerEnreplacedy(relationIndex, relationsRank, N, positiveIds);
    relationIndex.unpersist();
    // reverse the outNeighbors, to get in neighbors
    Map<Integer, IntArrayList> inNeighbors = topOutNeighbors.flatMapToPair(x -> {
        // reverse the neighbor pairs from (in,[out1,out2,out3]) to (out1,in), (out2,in), (out3,in)
        List<Tuple2<Integer, Integer>> inNeighbs = new ArrayList<>();
        for (int outNeighbor : x._2()) {
            inNeighbs.add(new Tuple2<>(outNeighbor, x._1()));
        }
        return inNeighbs.iterator();
    }).aggregateByKey(new IntOpenHashSet(), (x, y) -> {
        x.add(y);
        return x;
    }, (x, y) -> {
        x.addAll(y);
        return x;
    }).mapValues(x -> new IntArrayList(x)).collectAsMap();
    return inNeighbors;
}

19 Source : CNPNeighbors.java
with Apache License 2.0
from vefthym

/**
 * @param topKvalueCandidates the topK results per enreplacedy, acquired from value similarity
 * @param rawTriples1 the rdf triples of the first enreplacedy collection
 * @param rawTriples2 the rdf triples of the second enreplacedy collection
 * @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
 * @param enreplacedyIds1 the mapping of enreplacedy urls to enreplacedy ids, as it was used in blocking
 * @param enreplacedyIds2
 * @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
 * @param K the K for topK candidate matches
 * @param N the N for topN rdf relations (and neighbors)
 * @param jsc the java spark context used to load files and broadcast variables
 * @return topK neighbor candidates per enreplacedy
 */
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKvalueCandidates, JavaRDD<String> rawTriples1, JavaRDD<String> rawTriples2, String SEPARATOR, JavaRDD<String> enreplacedyIds1, JavaRDD<String> enreplacedyIds2, float MIN_SUPPORT_THRESHOLD, int K, int N, JavaSparkContext jsc) {
    Map<Integer, IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, enreplacedyIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
    inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, enreplacedyIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
    Broadcast<Map<Integer, IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
    // JavaPairRDD<Tuple2<Integer, Integer>, Float> neighborSims = getNeighborSims(topKvalueCandidates, inNeighbors_BV);
    // JavaPairRDD<Integer, IntArrayList> topKneighborCandidates =  getTopKNeighborSimsOld(neighborSims, K);
    JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSims(topKvalueCandidates, inNeighbors_BV, K);
    return topKneighborCandidates;
}

19 Source : CNPARCS.java
with Apache License 2.0
from vefthym

/**
 * @param topKvalueCandidates the topK results per enreplacedy, acquired from value similarity
 * @param rawTriples1 the rdf triples of the first enreplacedy collection
 * @param rawTriples2 the rdf triples of the second enreplacedy collection
 * @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
 * @param enreplacedyIds1 the mapping of enreplacedy urls to enreplacedy ids, as it was used in blocking
 * @param enreplacedyIds2
 * @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
 * @param K the K for topK candidate matches
 * @param N the N for topN rdf relations (and neighbors)
 * @param jsc the java spark context used to load files and broadcast variables
 * @return topK neighbor candidates per enreplacedy
 */
public JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> run2(JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKvalueCandidates, JavaRDD<String> rawTriples1, JavaRDD<String> rawTriples2, String SEPARATOR, JavaRDD<String> enreplacedyIds1, JavaRDD<String> enreplacedyIds2, float MIN_SUPPORT_THRESHOLD, int K, int N, JavaSparkContext jsc) {
    Map<Integer, IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, enreplacedyIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
    inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, enreplacedyIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
    Broadcast<Map<Integer, IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
    JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKneighborCandidates = getTopKNeighborSimsSUMWithScores(topKvalueCandidates, inNeighbors_BV, K);
    return topKneighborCandidates;
}

19 Source : CNPARCS.java
with Apache License 2.0
from vefthym

/**
 * @param topKvalueCandidates the topK results per enreplacedy, acquired from value similarity
 * @param rawTriples1 the rdf triples of the first enreplacedy collection
 * @param rawTriples2 the rdf triples of the second enreplacedy collection
 * @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
 * @param enreplacedyIds1 the mapping of enreplacedy urls to enreplacedy ids, as it was used in blocking
 * @param enreplacedyIds2
 * @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
 * @param K the K for topK candidate matches
 * @param N the N for topN rdf relations (and neighbors)
 * @param jsc the java spark context used to load files and broadcast variables
 * @return topK neighbor candidates per enreplacedy
 */
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKvalueCandidates, JavaRDD<String> rawTriples1, JavaRDD<String> rawTriples2, String SEPARATOR, JavaRDD<String> enreplacedyIds1, JavaRDD<String> enreplacedyIds2, float MIN_SUPPORT_THRESHOLD, int K, int N, JavaSparkContext jsc) {
    Map<Integer, IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, enreplacedyIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
    inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, enreplacedyIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
    Broadcast<Map<Integer, IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
    JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSimsSUM(topKvalueCandidates, inNeighbors_BV, K);
    return topKneighborCandidates;
}

19 Source : AgeClassifySparkTrainer.java
with Apache License 2.0
from USCDataScience

public static AgeClreplacedifyModel createModel(String languageCode, String dataIn, String tokenizer, String featureGenerators, TrainingParameters trainParams) throws IOException {
    SparkConf conf = new SparkConf().setAppName("AgeClreplacedifySparkTrainer");
    JavaSparkContext sc = new JavaSparkContext(conf);
    AgeClreplacedifyContextGeneratorWrapper wrapper = new AgeClreplacedifyContextGeneratorWrapper(tokenizer, featureGenerators);
    JavaRDD<String> data = sc.textFile(dataIn, 8).cache();
    JavaRDD<EventWrapper> samples = data.map(new CreateEvents(wrapper)).cache();
    /*
	JavaRDD<EventWrapper> samples = data.map( 
	     new Function<String, EventWrapper>() {
		 public EventWrapper call(String s) {
		     String[] parts = s.split(",");
		     
		     try {
			 if (parts[0] != "-1") {
			     Integer value = Integer.parseInt(parts[0]);
			     
			     String[] text = parts[2].split(" ");
			     return new EventWrapper(value, text);
			 } else {
			     String cat = parts[1];
			     
			     String[] text = parts[2].split(" ");
			     return new EventWrapper(cat, text);
			 }
		     } catch(Exception e) {
			 return null;
		     }
		 }
	     });
	*/
    JavaRDD<EventWrapper> validSamples = samples.filter(new Function<EventWrapper, Boolean>() {

        @Override
        public Boolean call(EventWrapper s) {
            return s != null;
        }
    }).cache();
    // ObjectStream<Event> eventStream = EventStreamUtil.createEventStream(samples);
    ObjectStream<Event> eventStream = EventStreamUtil.createEventStream(validSamples.collect());
    Map<String, String> entries = new HashMap<String, String>();
    EventTrainer trainer = AgeClreplacedifyTrainerFactory.getEventTrainer(trainParams.getSettings(), entries);
    MaxentModel ageModel = trainer.train(eventStream);
    samples.unpersist();
    data.unpersist();
    sc.stop();
    Map<String, String> manifestInfoEntries = new HashMap<String, String>();
    AgeClreplacedifyFactory factory = AgeClreplacedifyFactory.create("AgeClreplacedifyFactory", wrapper.getTokenizer(), wrapper.getFeatureGenerators());
    return new AgeClreplacedifyModel(languageCode, ageModel, manifestInfoEntries, factory);
}

19 Source : DataFrameRowFrameConversionTest.java
with Apache License 2.0
from tugraz-isds

public clreplaced DataFrameRowFrameConversionTest extends AutomatedTestBase {

    private final static String TEST_DIR = "functions/mlcontext/";

    private final static String TEST_NAME = "DataFrameConversion";

    private final static String TEST_CLreplaced_DIR = TEST_DIR + DataFrameRowFrameConversionTest.clreplaced.getSimpleName() + "/";

    private final static int rows1 = 1045;

    private final static int cols1 = 545;

    private final static int cols2 = 864;

    private final static double sparsity1 = 0.9;

    private final static double sparsity2 = 0.1;

    private final static double eps = 0.0000000001;

    private static SparkSession spark;

    private static JavaSparkContext sc;

    @BeforeClreplaced
    public static void setUpClreplaced() {
        spark = SparkSession.builder().appName("DataFrameRowFrameConversionTest").master("local").config("spark.memory.offHeap.enabled", "false").config("spark.sql.codegen.wholeStage", "false").getOrCreate();
        sc = new JavaSparkContext(spark.sparkContext());
    }

    @Override
    public void setUp() {
        addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLreplaced_DIR, TEST_NAME, new String[] { "A", "B" }));
    }

    @Test
    public void testRowDoubleConversionSingleDense() {
        testDataFrameConversion(ValueType.FP64, true, true, false);
    }

    @Test
    public void testRowDoubleConversionSingleDenseUnknown() {
        testDataFrameConversion(ValueType.FP64, true, true, true);
    }

    @Test
    public void testRowDoubleConversionSingleSparse() {
        testDataFrameConversion(ValueType.FP64, true, false, false);
    }

    @Test
    public void testRowDoubleConversionSingleSparseUnknown() {
        testDataFrameConversion(ValueType.FP64, true, false, true);
    }

    @Test
    public void testRowDoubleConversionMultiDense() {
        testDataFrameConversion(ValueType.FP64, false, true, false);
    }

    @Test
    public void testRowDoubleConversionMultiDenseUnknown() {
        testDataFrameConversion(ValueType.FP64, false, true, true);
    }

    @Test
    public void testRowDoubleConversionMultiSparse() {
        testDataFrameConversion(ValueType.FP64, false, false, false);
    }

    @Test
    public void testRowDoubleConversionMultiSparseUnknown() {
        testDataFrameConversion(ValueType.FP64, false, false, true);
    }

    @Test
    public void testRowStringConversionSingleDense() {
        testDataFrameConversion(ValueType.STRING, true, true, false);
    }

    @Test
    public void testRowStringConversionSingleDenseUnknown() {
        testDataFrameConversion(ValueType.STRING, true, true, true);
    }

    @Test
    public void testRowStringConversionSingleSparse() {
        testDataFrameConversion(ValueType.STRING, true, false, false);
    }

    @Test
    public void testRowStringConversionSingleSparseUnknown() {
        testDataFrameConversion(ValueType.STRING, true, false, true);
    }

    @Test
    public void testRowStringConversionMultiDense() {
        testDataFrameConversion(ValueType.STRING, false, true, false);
    }

    @Test
    public void testRowStringConversionMultiDenseUnknown() {
        testDataFrameConversion(ValueType.STRING, false, true, true);
    }

    @Test
    public void testRowStringConversionMultiSparse() {
        testDataFrameConversion(ValueType.STRING, false, false, false);
    }

    @Test
    public void testRowStringConversionMultiSparseUnknown() {
        testDataFrameConversion(ValueType.STRING, false, false, true);
    }

    @Test
    public void testRowLongConversionSingleDense() {
        testDataFrameConversion(ValueType.INT64, true, true, false);
    }

    @Test
    public void testRowLongConversionSingleDenseUnknown() {
        testDataFrameConversion(ValueType.INT64, true, true, true);
    }

    @Test
    public void testRowLongConversionSingleSparse() {
        testDataFrameConversion(ValueType.INT64, true, false, false);
    }

    @Test
    public void testRowLongConversionSingleSparseUnknown() {
        testDataFrameConversion(ValueType.INT64, true, false, true);
    }

    @Test
    public void testRowLongConversionMultiDense() {
        testDataFrameConversion(ValueType.INT64, false, true, false);
    }

    @Test
    public void testRowLongConversionMultiDenseUnknown() {
        testDataFrameConversion(ValueType.INT64, false, true, true);
    }

    @Test
    public void testRowLongConversionMultiSparse() {
        testDataFrameConversion(ValueType.INT64, false, false, false);
    }

    @Test
    public void testRowLongConversionMultiSparseUnknown() {
        testDataFrameConversion(ValueType.INT64, false, false, true);
    }

    private void testDataFrameConversion(ValueType vt, boolean singleColBlock, boolean dense, boolean unknownDims) {
        boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
        ExecMode oldPlatform = DMLScript.getGlobalExecMode();
        try {
            DMLScript.USE_LOCAL_SPARK_CONFIG = true;
            DMLScript.setGlobalExecMode(ExecMode.HYBRID);
            // generate input data and setup metadata
            int cols = singleColBlock ? cols1 : cols2;
            double sparsity = dense ? sparsity1 : sparsity2;
            double[][] A = getRandomMatrix(rows1, cols, -10, 10, sparsity, 2373);
            A = (vt == ValueType.INT64) ? TestUtils.round(A) : A;
            MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
            FrameBlock fbA = DataConverter.convertToFrameBlock(mbA, vt);
            int blksz = ConfigurationManager.getBlocksize();
            MatrixCharacteristics mc1 = new MatrixCharacteristics(rows1, cols, blksz, mbA.getNonZeros());
            MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);
            ValueType[] schema = UtilFunctions.nCopies(cols, vt);
            // get binary block input rdd
            JavaPairRDD<Long, FrameBlock> in = SparkExecutionContext.toFrameJavaPairRDD(sc, fbA);
            // frame - dataframe - frame conversion
            Dataset<Row> df = FrameRDDConverterUtils.binaryBlockToDataFrame(spark, in, mc1, schema);
            JavaPairRDD<Long, FrameBlock> out = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, true);
            // get output frame block
            FrameBlock fbB = SparkExecutionContext.toFrameBlock(out, schema, rows1, cols);
            // compare frame blocks
            MatrixBlock mbB = DataConverter.convertToMatrixBlock(fbB);
            double[][] B = DataConverter.convertToDoubleMatrix(mbB);
            TestUtils.compareMatrices(A, B, rows1, cols, eps);
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        } finally {
            DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
            DMLScript.setGlobalExecMode(oldPlatform);
        }
    }

    @AfterClreplaced
    public static void tearDownClreplaced() {
        // stop underlying spark context to allow single jvm tests (otherwise the
        // next test that tries to create a SparkContext would fail)
        spark.stop();
        sc = null;
        spark = null;
    }
}

19 Source : DataFrameMatrixConversionTest.java
with Apache License 2.0
from tugraz-isds

public clreplaced DataFrameMatrixConversionTest extends AutomatedTestBase {

    private final static String TEST_DIR = "functions/mlcontext/";

    private final static String TEST_NAME = "DataFrameConversion";

    private final static String TEST_CLreplaced_DIR = TEST_DIR + DataFrameMatrixConversionTest.clreplaced.getSimpleName() + "/";

    private final static int rows1 = 2245;

    private final static int rows3 = 7;

    private final static int cols1 = 745;

    private final static int cols2 = 1264;

    private final static int cols3 = 10038;

    private final static double sparsity1 = 0.9;

    private final static double sparsity2 = 0.1;

    private final static double eps = 0.0000000001;

    private static SparkSession spark;

    private static JavaSparkContext sc;

    @BeforeClreplaced
    public static void setUpClreplaced() {
        spark = createSystemDSSparkSession("DataFrameMatrixConversionTest", "local");
        sc = new JavaSparkContext(spark.sparkContext());
        LazyWriteBuffer.init();
    }

    @Override
    public void setUp() {
        addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLreplaced_DIR, TEST_NAME, new String[] { "A", "B" }));
    }

    @Test
    public void testVectorConversionSingleDense() {
        testDataFrameConversion(true, cols1, true, false);
    }

    @Test
    public void testVectorConversionSingleDenseUnknown() {
        testDataFrameConversion(true, cols1, true, true);
    }

    @Test
    public void testVectorConversionSingleSparse() {
        testDataFrameConversion(true, cols1, false, false);
    }

    @Test
    public void testVectorConversionSingleSparseUnknown() {
        testDataFrameConversion(true, cols1, false, true);
    }

    @Test
    public void testVectorConversionMultiDense() {
        testDataFrameConversion(true, cols2, true, false);
    }

    @Test
    public void testVectorConversionMultiDenseUnknown() {
        testDataFrameConversion(true, cols2, true, true);
    }

    @Test
    public void testVectorConversionMultiSparse() {
        testDataFrameConversion(true, cols2, false, false);
    }

    @Test
    public void testVectorConversionMultiSparseUnknown() {
        testDataFrameConversion(true, cols2, false, true);
    }

    @Test
    public void testRowConversionSingleDense() {
        testDataFrameConversion(false, cols1, true, false);
    }

    @Test
    public void testRowConversionSingleDenseUnknown() {
        testDataFrameConversion(false, cols1, true, true);
    }

    @Test
    public void testRowConversionSingleSparse() {
        testDataFrameConversion(false, cols1, false, false);
    }

    @Test
    public void testRowConversionSingleSparseUnknown() {
        testDataFrameConversion(false, cols1, false, true);
    }

    @Test
    public void testRowConversionMultiDense() {
        testDataFrameConversion(false, cols2, true, false);
    }

    @Test
    public void testRowConversionMultiDenseUnknown() {
        testDataFrameConversion(false, cols2, true, true);
    }

    @Test
    public void testRowConversionMultiSparse() {
        testDataFrameConversion(false, cols2, false, false);
    }

    @Test
    public void testRowConversionMultiSparseUnknown() {
        testDataFrameConversion(false, cols2, false, true);
    }

    @Test
    public void testVectorConversionWideDense() {
        testDataFrameConversion(true, cols3, true, false);
    }

    @Test
    public void testVectorConversionWideDenseUnknown() {
        testDataFrameConversion(true, cols3, true, true);
    }

    @Test
    public void testVectorConversionWideSparse() {
        testDataFrameConversion(true, cols3, false, false);
    }

    @Test
    public void testVectorConversionWideSparseUnknown() {
        testDataFrameConversion(true, cols3, false, true);
    }

    @Test
    public void testVectorConversionMultiUltraSparse() {
        testDataFrameConversionUltraSparse(true, false);
    }

    @Test
    public void testVectorConversionMultiUltraSparseUnknown() {
        testDataFrameConversionUltraSparse(true, true);
    }

    @Test
    public void testRowConversionMultiUltraSparse() {
        testDataFrameConversionUltraSparse(false, false);
    }

    @Test
    public void testRowConversionMultiUltraSparseUnknown() {
        testDataFrameConversionUltraSparse(false, true);
    }

    private void testDataFrameConversion(boolean vector, int cols, boolean dense, boolean unknownDims) {
        boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
        ExecMode oldPlatform = DMLScript.getGlobalExecMode();
        try {
            DMLScript.USE_LOCAL_SPARK_CONFIG = true;
            DMLScript.setGlobalExecMode(ExecMode.HYBRID);
            // generate input data and setup metadata
            int rows = (cols == cols3) ? rows3 : rows1;
            double sparsity = dense ? sparsity1 : sparsity2;
            double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 2373);
            MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
            int blksz = ConfigurationManager.getBlocksize();
            MatrixCharacteristics mc1 = new MatrixCharacteristics(rows, cols, blksz, mbA.getNonZeros());
            MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);
            // get binary block input rdd
            JavaPairRDD<MatrixIndexes, MatrixBlock> in = SparkExecutionContext.toMatrixJavaPairRDD(sc, mbA, blksz);
            // matrix - dataframe - matrix conversion
            Dataset<Row> df = RDDConverterUtils.binaryBlockToDataFrame(spark, in, mc1, vector);
            df = (rows == rows3) ? df.reparreplacedion(rows) : df;
            JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, true, vector);
            // get output matrix block
            MatrixBlock mbB = SparkExecutionContext.toMatrixBlock(out, rows, cols, blksz, -1);
            // compare matrix blocks
            double[][] B = DataConverter.convertToDoubleMatrix(mbB);
            TestUtils.compareMatrices(A, B, rows, cols, eps);
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        } finally {
            DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
            DMLScript.setGlobalExecMode(oldPlatform);
        }
    }

    private void testDataFrameConversionUltraSparse(boolean vector, boolean unknownDims) {
        boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
        ExecMode oldPlatform = DMLScript.getGlobalExecMode();
        try {
            DMLScript.USE_LOCAL_SPARK_CONFIG = true;
            DMLScript.setGlobalExecMode(ExecMode.HYBRID);
            // generate input data and setup metadata
            double[][] A = getRandomMatrix(rows1, 1, -10, 10, 0.7, 2373);
            MatrixBlock mbA0 = DataConverter.convertToMatrixBlock(A);
            MatrixBlock mbA = LibMatrixReorg.diag(mbA0, new MatrixBlock(rows1, rows1, true));
            int blksz = ConfigurationManager.getBlocksize();
            MatrixCharacteristics mc1 = new MatrixCharacteristics(rows1, rows1, blksz, mbA.getNonZeros());
            MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);
            // get binary block input rdd
            JavaPairRDD<MatrixIndexes, MatrixBlock> in = SparkExecutionContext.toMatrixJavaPairRDD(sc, mbA, blksz);
            // matrix - dataframe - matrix conversion
            Dataset<Row> df = RDDConverterUtils.binaryBlockToDataFrame(spark, in, mc1, vector);
            JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, true, vector);
            // get output matrix block
            MatrixBlock mbB0 = SparkExecutionContext.toMatrixBlock(out, rows1, rows1, blksz, -1);
            MatrixBlock mbB = LibMatrixReorg.diag(mbB0, new MatrixBlock(rows1, 1, false));
            // compare matrix blocks
            double[][] B = DataConverter.convertToDoubleMatrix(mbB);
            TestUtils.compareMatrices(A, B, rows1, 1, eps);
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        } finally {
            DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
            DMLScript.setGlobalExecMode(oldPlatform);
        }
    }

    @AfterClreplaced
    public static void tearDownClreplaced() {
        // stop underlying spark context to allow single jvm tests (otherwise the
        // next test that tries to create a SparkContext would fail)
        spark.stop();
        sc = null;
        spark = null;
        LazyWriteBuffer.cleanup();
    }
}

19 Source : RDDConverterUtilsExt.java
with Apache License 2.0
from tugraz-isds

public static JavaPairRDD<MatrixIndexes, MatrixBlock> coordinateMatrixToBinaryBlock(JavaSparkContext sc, CoordinateMatrix input, DataCharacteristics mcIn, boolean outputEmptyBlocks) {
    // convert matrix entry rdd to binary block rdd (w/ partial blocks)
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = input.entries().toJavaRDD().mapParreplacedionsToPair(new MatrixEntryToBinaryBlockFunction(mcIn));
    // inject empty blocks (if necessary)
    if (outputEmptyBlocks && mcIn.mightHaveEmptyBlocks()) {
        out = out.union(SparkUtils.getEmptyBlockRDD(sc, mcIn));
    }
    // aggregate partial matrix blocks
    out = RDDAggregateUtils.mergeByKey(out, false);
    return out;
}

19 Source : RDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

public static JavaPairRDD<MatrixIndexes, MatrixBlock> textCellToBinaryBlock(JavaSparkContext sc, JavaPairRDD<LongWritable, Text> input, DataCharacteristics mcOut, boolean outputEmptyBlocks, FileFormatPropertiesMM mmProps) {
    // convert textcell rdd to binary block rdd (w/ partial blocks)
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = input.values().mapParreplacedionsToPair(new TextToBinaryBlockFunction(mcOut, mmProps));
    // inject empty blocks (if necessary)
    if (outputEmptyBlocks && mcOut.mightHaveEmptyBlocks()) {
        out = out.union(SparkUtils.getEmptyBlockRDD(sc, mcOut));
    }
    // aggregate partial matrix blocks
    out = RDDAggregateUtils.mergeByKey(out, false);
    return out;
}

19 Source : RDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

/**
 * Converts a libsvm text input file into two binary block matrices for features
 * and labels, and saves these to the specified output files. This call also deletes
 * existing files at the specified output locations, as well as determines and
 * writes the meta data files of both output matrices.
 * <p>
 * Note: We use {@code org.apache.spark.mllib.util.MLUtils.loadLibSVMFile} for parsing
 * the libsvm input files in order to ensure consistency with Spark.
 *
 * @param sc java spark context
 * @param pathIn path to libsvm input file
 * @param pathX path to binary block output file of features
 * @param pathY path to binary block output file of labels
 * @param mcOutX matrix characteristics of output matrix X
 */
public static void libsvmToBinaryBlock(JavaSparkContext sc, String pathIn, String pathX, String pathY, DataCharacteristics mcOutX) {
    if (!mcOutX.dimsKnown())
        throw new DMLRuntimeException("Matrix characteristics " + "required to convert sparse input representation.");
    try {
        // cleanup existing output files
        HDFSTool.deleteFileIfExistOnHDFS(pathX);
        HDFSTool.deleteFileIfExistOnHDFS(pathY);
        // convert libsvm to labeled points
        int numFeatures = (int) mcOutX.getCols();
        int numParreplacedions = SparkUtils.getNumPreferredParreplacedions(mcOutX, null);
        JavaRDD<org.apache.spark.mllib.regression.LabeledPoint> lpoints = MLUtils.loadLibSVMFile(sc.sc(), pathIn, numFeatures, numParreplacedions).toJavaRDD();
        // append row index and best-effort caching to avoid repeated text parsing
        JavaPairRDD<org.apache.spark.mllib.regression.LabeledPoint, Long> ilpoints = lpoints.zipWithIndex().persist(StorageLevel.MEMORY_AND_DISK());
        // extract labels and convert to binary block
        DataCharacteristics mc1 = new MatrixCharacteristics(mcOutX.getRows(), 1, mcOutX.getBlocksize(), -1);
        LongAcreplacedulator aNnz1 = sc.sc().longAcreplacedulator("nnz");
        JavaPairRDD<MatrixIndexes, MatrixBlock> out1 = ilpoints.mapParreplacedionsToPair(new LabeledPointToBinaryBlockFunction(mc1, true, aNnz1));
        int numParreplacedions2 = SparkUtils.getNumPreferredParreplacedions(mc1, null);
        out1 = RDDAggregateUtils.mergeByKey(out1, numParreplacedions2, false);
        out1.saveAsHadoopFile(pathY, MatrixIndexes.clreplaced, MatrixBlock.clreplaced, SequenceFileOutputFormat.clreplaced);
        // update nnz after triggered save
        mc1.setNonZeros(aNnz1.value());
        HDFSTool.writeMetaDataFile(pathY + ".mtd", ValueType.FP64, mc1, OutputInfo.BinaryBlockOutputInfo);
        // extract data and convert to binary block
        DataCharacteristics mc2 = new MatrixCharacteristics(mcOutX.getRows(), mcOutX.getCols(), mcOutX.getBlocksize(), -1);
        LongAcreplacedulator aNnz2 = sc.sc().longAcreplacedulator("nnz");
        JavaPairRDD<MatrixIndexes, MatrixBlock> out2 = ilpoints.mapParreplacedionsToPair(new LabeledPointToBinaryBlockFunction(mc2, false, aNnz2));
        out2 = RDDAggregateUtils.mergeByKey(out2, numParreplacedions, false);
        out2.saveAsHadoopFile(pathX, MatrixIndexes.clreplaced, MatrixBlock.clreplaced, SequenceFileOutputFormat.clreplaced);
        // update nnz after triggered save
        mc2.setNonZeros(aNnz2.value());
        HDFSTool.writeMetaDataFile(pathX + ".mtd", ValueType.FP64, mc2, OutputInfo.BinaryBlockOutputInfo);
        // asynchronous cleanup of cached intermediates
        ilpoints.unpersist(false);
    } catch (IOException ex) {
        throw new DMLRuntimeException(ex);
    }
}

19 Source : RDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

public static JavaPairRDD<MatrixIndexes, MatrixBlock> csvToBinaryBlock(JavaSparkContext sc, JavaRDD<String> input, DataCharacteristics mcOut, boolean hasHeader, String delim, boolean fill, double fillValue) {
    // convert string rdd to serializable longwritable/text
    JavaPairRDD<LongWritable, Text> prepinput = input.mapToPair(new StringToSerTextFunction());
    // convert to binary block
    return csvToBinaryBlock(sc, prepinput, mcOut, hasHeader, delim, fill, fillValue);
}

19 Source : RDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

public static JavaPairRDD<MatrixIndexes, MatrixBlock> binaryCellToBinaryBlock(JavaSparkContext sc, JavaPairRDD<MatrixIndexes, MatrixCell> input, DataCharacteristics mcOut, boolean outputEmptyBlocks) {
    // convert binarycell rdd to binary block rdd (w/ partial blocks)
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = input.mapParreplacedionsToPair(new BinaryCellToBinaryBlockFunction(mcOut));
    // inject empty blocks (if necessary)
    if (outputEmptyBlocks && mcOut.mightHaveEmptyBlocks()) {
        out = out.union(SparkUtils.getEmptyBlockRDD(sc, mcOut));
    }
    // aggregate partial matrix blocks
    out = RDDAggregateUtils.mergeByKey(out, false);
    return out;
}

19 Source : RDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToBinaryBlock(JavaSparkContext sc, Dataset<Row> df, DataCharacteristics mc, boolean containsID, boolean isVector) {
    // determine unknown dimensions and sparsity if required
    if (!mc.dimsKnown(true)) {
        LongAcreplacedulator aNnz = sc.sc().longAcreplacedulator("nnz");
        JavaRDD<Row> tmp = df.javaRDD().map(new DataFramereplacedysisFunction(aNnz, containsID, isVector));
        long rlen = tmp.count();
        long clen = !isVector ? df.columns().length - (containsID ? 1 : 0) : ((Vector) tmp.first().get(containsID ? 1 : 0)).size();
        long nnz = UtilFunctions.toLong(aNnz.value());
        mc.set(rlen, clen, mc.getBlocksize(), nnz);
    }
    // ensure valid blocksizes
    if (mc.getBlocksize() <= 1)
        mc.setBlocksize(ConfigurationManager.getBlocksize());
    // construct or reuse row ids
    JavaPairRDD<Row, Long> prepinput = containsID ? df.javaRDD().mapToPair(new DataFrameExtractIDFunction(df.schema().fieldIndex(DF_ID_COLUMN))) : // zip row index
    df.javaRDD().zipWithIndex();
    // convert csv rdd to binary block rdd (w/ partial blocks)
    boolean sparse = requiresSparseAllocation(prepinput, mc);
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = prepinput.mapParreplacedionsToPair(new DataFrameToBinaryBlockFunction(mc, sparse, containsID, isVector));
    // aggregate partial matrix blocks (w/ preferred number of output
    // parreplacedions as the data is likely smaller in binary block format,
    // but also to bound the size of parreplacedions for compressed inputs)
    int parts = SparkUtils.getNumPreferredParreplacedions(mc, out);
    return RDDAggregateUtils.mergeByKey(out, parts, false);
}

19 Source : RDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

public static JavaPairRDD<MatrixIndexes, MatrixBlock> csvToBinaryBlock(JavaSparkContext sc, JavaPairRDD<LongWritable, Text> input, DataCharacteristics mc, boolean hasHeader, String delim, boolean fill, double fillValue) {
    // determine unknown dimensions and sparsity if required
    // (w/ robustness for mistakenly counted header in nnz)
    if (!mc.dimsKnown(true)) {
        LongAcreplacedulator aNnz = sc.sc().longAcreplacedulator("nnz");
        JavaRDD<String> tmp = input.values().map(new CSVreplacedysisFunction(aNnz, delim));
        long rlen = tmp.count() - (hasHeader ? 1 : 0);
        long clen = tmp.first().split(delim).length;
        long nnz = Math.min(rlen * clen, UtilFunctions.toLong(aNnz.value()));
        mc.set(rlen, clen, mc.getBlocksize(), nnz);
    }
    // prepare csv w/ row indexes (sorted by filenames)
    JavaPairRDD<Text, Long> prepinput = input.values().zipWithIndex();
    // convert csv rdd to binary block rdd (w/ partial blocks)
    boolean sparse = requiresSparseAllocation(prepinput, mc);
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = prepinput.mapParreplacedionsToPair(new CSVToBinaryBlockFunction(mc, sparse, hasHeader, delim, fill, fillValue));
    // aggregate partial matrix blocks (w/ preferred number of output
    // parreplacedions as the data is likely smaller in binary block format,
    // but also to bound the size of parreplacedions for compressed inputs)
    int parts = SparkUtils.getNumPreferredParreplacedions(mc, out);
    return RDDAggregateUtils.mergeByKey(out, parts, false);
}

19 Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

// =====================================
// Text cell <--> Binary block
public static JavaPairRDD<Long, FrameBlock> textCellToBinaryBlock(JavaSparkContext sc, JavaPairRDD<LongWritable, Text> in, DataCharacteristics mcOut, ValueType[] schema) {
    // convert input rdd to serializable long/frame block
    JavaPairRDD<Long, Text> input = in.mapToPair(new LongWritableTextToLongTextFunction());
    // do actual conversion
    return textCellToBinaryBlockLongIndex(sc, input, mcOut, schema);
}

19 Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

// =====================================
// DataFrame <--> Binary block
public static JavaPairRDD<Long, FrameBlock> dataFrameToBinaryBlock(JavaSparkContext sc, Dataset<Row> df, DataCharacteristics mc, boolean containsID) {
    return dataFrameToBinaryBlock(sc, df, mc, containsID, new Pair<String[], ValueType[]>());
}

19 Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

public static JavaPairRDD<Long, FrameBlock> textCellToBinaryBlockLongIndex(JavaSparkContext sc, JavaPairRDD<Long, Text> input, DataCharacteristics mc, ValueType[] schema) {
    // prepare default schema if needed
    if (schema == null || schema.length == 1) {
        schema = UtilFunctions.nCopies((int) mc.getCols(), (schema != null) ? schema[0] : ValueType.STRING);
    }
    // convert textcell rdd to binary block rdd (w/ partial blocks)
    JavaPairRDD<Long, FrameBlock> output = input.values().mapParreplacedionsToPair(new TextToBinaryBlockFunction(mc, schema));
    // aggregate partial matrix blocks
    return FrameRDDAggregateUtils.mergeByKey(output);
}

19 Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

public static JavaPairRDD<Long, FrameBlock> dataFrameToBinaryBlock(JavaSparkContext sc, Dataset<Row> df, DataCharacteristics mc, boolean containsID, Pair<String[], ValueType[]> out) {
    // determine unknown dimensions if required
    if (!mc.dimsKnown()) {
        // nnz are irrelevant here
        int colVect = getColVectFromDFSchema(df.schema(), containsID);
        int off = (containsID ? 1 : 0);
        long rlen = df.count();
        long clen = df.columns().length - off + ((colVect >= 0) ? ((Vector) df.first().get(off + colVect)).size() - 1 : 0);
        mc.set(rlen, clen, mc.getBlocksize(), -1);
    }
    // append or reuse row index column
    JavaPairRDD<Row, Long> prepinput = containsID ? df.javaRDD().mapToPair(new DataFrameExtractIDFunction(df.schema().fieldIndex(RDDConverterUtils.DF_ID_COLUMN))) : // zip row index
    df.javaRDD().zipWithIndex();
    // convert data frame to frame schema (prepare once)
    String[] colnames = new String[(int) mc.getCols()];
    ValueType[] fschema = new ValueType[(int) mc.getCols()];
    int colVect = convertDFSchemaToFrameSchema(df.schema(), colnames, fschema, containsID);
    // make schema available
    out.set(colnames, fschema);
    // convert rdd to binary block rdd
    return prepinput.mapParreplacedionsToPair(new DataFrameToBinaryBlockFunction(mc, colnames, fschema, containsID, colVect));
}

19 Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

public static JavaPairRDD<Long, FrameBlock> matrixBlockToBinaryBlockLongIndex(JavaSparkContext sc, JavaPairRDD<MatrixIndexes, MatrixBlock> input, DataCharacteristics dcIn) {
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = input;
    DataCharacteristics mc = new MatrixCharacteristics(dcIn);
    // reblock matrix blocks if required (multiple column blocks)
    if (dcIn.getCols() > dcIn.getBlocksize()) {
        // split matrix blocks into extended matrix blocks
        in = in.flatMapToPair(new MatrixFrameReblockFunction(dcIn));
        mc.setBlocksize(MatrixFrameReblockFunction.computeBlockSize(mc));
        // shuffle matrix blocks (instead of frame blocks) in order to exploit
        // sparse formats (for sparse or wide matrices) during shuffle
        in = RDDAggregateUtils.mergeByKey(in, false);
    }
    // convert individual matrix blocks to frame blocks (w/o shuffle)
    return in.mapToPair(new MatrixToFrameBlockFunction(mc));
}

19 Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

public static JavaPairRDD<Long, FrameBlock> csvToBinaryBlock(JavaSparkContext sc, JavaRDD<String> input, DataCharacteristics mcOut, ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue) {
    // convert string rdd to serializable longwritable/text
    JavaPairRDD<LongWritable, Text> prepinput = input.mapToPair(new StringToSerTextFunction());
    // convert to binary block
    return csvToBinaryBlock(sc, prepinput, mcOut, schema, hasHeader, delim, fill, fillValue);
}

19 Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

/* 
	 * It will return JavaRDD<Row> based on csv data input file.
	 */
public static JavaRDD<Row> csvToRowRDD(JavaSparkContext sc, String fnameIn, String delim, ValueType[] schema) {
    // Load a text file and convert each line to a java rdd.
    JavaRDD<String> dataRdd = sc.textFile(fnameIn);
    return dataRdd.map(new RowGenerator(schema, delim));
}

19 Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

// =====================================
// Matrix block <--> Binary block
public static JavaPairRDD<LongWritable, FrameBlock> matrixBlockToBinaryBlock(JavaSparkContext sc, JavaPairRDD<MatrixIndexes, MatrixBlock> input, DataCharacteristics mcIn) {
    // convert and map to serializable LongWritable/frame block
    return matrixBlockToBinaryBlockLongIndex(sc, input, mcIn).mapToPair(new LongFrameToLongWritableFrameFunction());
}

19 Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

// =====================================
// CSV <--> Binary block
public static JavaPairRDD<Long, FrameBlock> csvToBinaryBlock(JavaSparkContext sc, JavaPairRDD<LongWritable, Text> input, DataCharacteristics mc, ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue) {
    // determine unknown dimensions and sparsity if required
    if (!mc.dimsKnown()) {
        // nnz irrelevant here
        JavaRDD<String> tmp = input.values().map(new TextToStringFunction());
        String tmpStr = tmp.first();
        boolean metaHeader = tmpStr.startsWith(TfUtils.TXMTD_MVPREFIX) || tmpStr.startsWith(TfUtils.TXMTD_NDPREFIX);
        tmpStr = (metaHeader) ? tmpStr.substring(tmpStr.indexOf(delim) + 1) : tmpStr;
        long rlen = tmp.count() - (hasHeader ? 1 : 0) - (metaHeader ? 2 : 0);
        long clen = IOUtilFunctions.splitCSV(tmpStr, delim).length;
        mc.set(rlen, clen, mc.getBlocksize(), -1);
    }
    // prepare csv w/ row indexes (sorted by filenames)
    JavaPairRDD<Text, Long> prepinput = input.values().zipWithIndex();
    // prepare default schema if needed
    if (schema == null || schema.length == 1)
        schema = UtilFunctions.nCopies((int) mc.getCols(), ValueType.STRING);
    // convert csv rdd to binary block rdd (w/ partial blocks)
    JavaPairRDD<Long, FrameBlock> out = prepinput.mapParreplacedionsToPair(new CSVToBinaryBlockFunction(mc, schema, hasHeader, delim));
    return out;
}

19 Source : FrameRDDConverterUtils.java
with Apache License 2.0
from tugraz-isds

/* 
	 * It will return JavaRDD<Row> based on csv data.
	 */
public static JavaRDD<Row> csvToRowRDD(JavaSparkContext sc, JavaRDD<String> dataRdd, String delim, ValueType[] schema) {
    // Convert each line to a java rdd.
    return dataRdd.map(new RowGenerator(schema, delim));
}

19 Source : MultiReturnParameterizedBuiltinSPInstruction.java
with Apache License 2.0
from tugraz-isds

private static MaxLongAcreplacedulator registerMaxLongAcreplacedulator(JavaSparkContext sc) {
    MaxLongAcreplacedulator acc = new MaxLongAcreplacedulator(Long.MIN_VALUE);
    sc.sc().register(acc, "max");
    return acc;
}

19 Source : RemoteDPParForSpark.java
with Apache License 2.0
from tugraz-isds

public static RemoteParForJobReturn runJob(long pfid, String itervar, String matrixvar, String program, HashMap<String, byte[]> clsMap, String resultFile, MatrixObject input, ExecutionContext ec, ParreplacedionFormat dpf, OutputInfo oi, boolean tSparseCol, boolean enableCPCaching, int numReducers) {
    String jobname = "ParFor-DPESP";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    JavaSparkContext sc = sec.getSparkContext();
    // prepare input parameters
    MatrixObject mo = sec.getMatrixObject(matrixvar);
    DataCharacteristics mc = mo.getDataCharacteristics();
    // initialize acreplacedulators for tasks/iterations, and inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryMatrixBlockRDDHandleForVariable(matrixvar);
    LongAcreplacedulator aTasks = sc.sc().longAcreplacedulator("tasks");
    LongAcreplacedulator aIters = sc.sc().longAcreplacedulator("iterations");
    // compute number of reducers (to avoid OOMs and reduce memory pressure)
    int numParts = SparkUtils.getNumPreferredParreplacedions(mc, in);
    int numReducers2 = Math.max(numReducers, Math.min(numParts, (int) dpf.getNumParts(mc)));
    // core parfor dataparreplacedion-execute (w/ or w/o shuffle, depending on data characteristics)
    RemoteDPParForSparkWorker efun = new RemoteDPParForSparkWorker(program, clsMap, matrixvar, itervar, enableCPCaching, mc, tSparseCol, dpf, oi, aTasks, aIters);
    JavaPairRDD<Long, Writable> tmp = getParreplacedionedInput(sec, matrixvar, oi, dpf);
    List<Tuple2<Long, String>> out = (requiresGrouping(dpf, mo) ? tmp.groupByKey(numReducers2) : tmp.map(new PseudoGrouping())).mapParreplacedionsToPair(// execute parfor tasks, incl cleanup
    efun).collect();
    // de-serialize results
    LocalVariableMap[] results = RemoteParForUtils.getResults(out, LOG);
    // get acreplacedulator value
    int numTasks = aTasks.value().intValue();
    // get acreplacedulator value
    int numIters = aIters.value().intValue();
    // create output symbol table entries
    RemoteParForJobReturn ret = new RemoteParForJobReturn(true, numTasks, numIters, results);
    // maintain statistics
    Statistics.incrementNoOfCompiledSPInst();
    Statistics.incrementNoOfExecutedSPInst();
    if (DMLScript.STATISTICS) {
        Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
    }
    return ret;
}

19 Source : VectorOpsMain.java
with Apache License 2.0
from tudorv91

public clreplaced VectorOpsMain {

    private static JavaSparkContext jscSingleton;

    private static String nativePath = null;

    private static String appName = "vectorOps";

    private static final boolean debug = true;

    private static void initSparkJNI() {
        nativePath = Paths.get("sparkjni-examples/src/main/cpp/examples/vectorOps").normalize().toAbsolutePath().toString();
        appName = "vectorOps";
        String sparkjniClreplacedpath = FileSystems.getDefault().getPath("core/target/clreplacedes").toAbsolutePath().normalize().toString();
        String examplesClreplacedpath = FileSystems.getDefault().getPath("sparkjni-examples/target/clreplacedes").toAbsolutePath().normalize().toString();
        SparkJni sparkJni = new SparkJniBuilder().nativePath(nativePath).appName(appName).build();
        sparkJni.setDeployMode(new DeployMode(JUST_BUILD)).addToClreplacedpath(sparkjniClreplacedpath, examplesClreplacedpath);
        sparkJni.registerContainer(VectorBean.clreplaced).registerJniFunction(VectorMulJni.clreplaced).registerJniFunction(VectorAddJni.clreplaced);
        sparkJni.deploy();
    }

    private static JavaSparkContext getSparkContext() {
        if (jscSingleton == null) {
            SparkConf sparkConf = new SparkConf().setAppName(appName);
            sparkConf.setMaster("local[4]");
            jscSingleton = new JavaSparkContext(sparkConf);
        }
        return jscSingleton;
    }

    private static ArrayList<VectorBean> generateVectors(int noVectors, int vectorSize) {
        ArrayList<VectorBean> vectors = new ArrayList<>();
        for (int i = 0; i < noVectors; i++) {
            int[] data = new int[vectorSize];
            if (debug)
                System.out.println(String.format("Vector %d:", i));
            for (int idx = 0; idx < vectorSize; idx++) {
                data[idx] = (int) (Math.random() * 1000);
                if (debug)
                    System.out.println(String.format("idx %d: %d", idx, data[idx]));
            }
            vectors.add(new VectorBean(data));
        }
        return vectors;
    }

    public static void main(String[] args) {
        initSparkJNI();
        String libPath = String.format(CppSyntax.NATIVE_LIB_PATH, nativePath, appName);
        JavaRDD<VectorBean> vectorsRdd = getSparkContext().parallelize(generateVectors(2, 4));
        JavaRDD<VectorBean> mulResults = vectorsRdd.map(new VectorMulJni(libPath, "mapVectorMul"));
        VectorBean results = mulResults.reduce(new VectorAddJni(libPath, "reduceVectorAdd"));
        debugRes(results);
    }

    private static void debugRes(VectorBean vector) {
        if (debug) {
            System.out.println("Result:");
            for (int i = 0; i < vector.data.length; i++) System.out.println(vector.data[i]);
        }
    }
}

19 Source : ExampleUtils.java
with Apache License 2.0
from tudorv91

public clreplaced ExampleUtils {

    private static JavaSparkContext jscSingleton;

    public static ArrayList<VectorBean> generateVectors(int noVectors, int vectorSize, boolean debug) {
        ArrayList<VectorBean> vectors = new ArrayList<>();
        for (int i = 0; i < noVectors; i++) {
            int[] data = new int[vectorSize];
            if (debug)
                System.out.println(String.format("Vector %d:", i));
            for (int idx = 0; idx < vectorSize; idx++) {
                data[idx] = (int) (Math.random() * 1000);
                if (debug)
                    System.out.println(String.format("idx %d: %d", idx, data[idx]));
            }
            vectors.add(new VectorBean(data));
        }
        return vectors;
    }

    public static JavaSparkContext getSparkContext(String appName) {
        if (jscSingleton == null) {
            SparkConf sparkConf = new SparkConf().setAppName(appName);
            sparkConf.setMaster("local[*]");
            sparkConf.set("spark.driver.maxResultSize", "16g");
            jscSingleton = new JavaSparkContext(sparkConf);
        }
        return jscSingleton;
    }

    public static void initSparkJNI(String appName, String nativePath) {
        String sparkjniClreplacedpath = FileSystems.getDefault().getPath("core/target/clreplacedes").toAbsolutePath().normalize().toString();
        String examplesClreplacedpath = FileSystems.getDefault().getPath("sparkjni-examples/target/clreplacedes").toAbsolutePath().normalize().toString();
        SparkJni sparkJni = new SparkJniBuilder().nativePath(nativePath).appName(appName).build();
        sparkJni.setDeployMode(new DeployMode(JUST_BUILD)).addToClreplacedpath(sparkjniClreplacedpath, examplesClreplacedpath);
        sparkJni.registerContainer(VectorBean.clreplaced).registerJniFunction(VectorMulJni.clreplaced).registerJniFunction(VectorAddJni.clreplaced);
        sparkJni.deploy();
    }
}

19 Source : GeneratorVectorOpsMain.java
with Apache License 2.0
from tudorv91

public clreplaced GeneratorVectorOpsMain {

    private static JavaSparkContext jscSingleton;

    private static String appName = "vectorOps";

    private static boolean debug = true;

    public static JavaSparkContext getSparkContext() {
        if (jscSingleton == null) {
            SparkConf sparkConf = new SparkConf().setAppName(appName);
            sparkConf.setMaster("local[4]");
            jscSingleton = new JavaSparkContext(sparkConf);
        }
        return jscSingleton;
    }

    public static ArrayList<VectorBean> generateVectors(int noVectors, int vectorSize) {
        ArrayList<VectorBean> vectors = new ArrayList<VectorBean>();
        for (int i = 0; i < noVectors; i++) {
            int[] data = new int[vectorSize];
            if (debug)
                System.out.println(String.format("Vector %d:", i));
            for (int idx = 0; idx < vectorSize; idx++) {
                data[idx] = (int) (Math.random() * 1000);
                if (debug)
                    System.out.println(String.format("idx %d: %d", idx, data[idx]));
            }
            vectors.add(new VectorBean(data));
        }
        return vectors;
    }

    public static void main(String[] args) {
        String nativePath = Paths.get("src/test/resources/vectorOps/src/main/resources/vectorOps").toAbsolutePath().toString();
        String relativeLibPath = String.format(CppSyntax.NATIVE_LIB_PATH, nativePath, appName);
        String absLibPath = new File(relativeLibPath).toPath().toAbsolutePath().toString();
        JavaRDD<VectorBean> vectorsRdd = getSparkContext().parallelize(generateVectors(2, 4));
        JavaRDD<VectorBean> mulResults = vectorsRdd.map(new VectorMulJni(absLibPath, "mapVectorMul"));
        VectorBean results = mulResults.reduce(new VectorAddJni(absLibPath, "reduceVectorAdd"));
        debugRes(results);
    }

    private static void debugRes(VectorBean vector) {
        if (debug) {
            System.out.println("Result:");
            for (int i = 0; i < vector.data.length; i++) System.out.println(vector.data[i]);
        }
    }
}

19 Source : TestUtils.java
with Apache License 2.0
from tudorv91

public clreplaced TestUtils {

    public static final String CLUSTER_CONF_LOCAL_4 = "local[4]";

    public String defaultTestFolder = "resources/%s";

    public File testDir;

    public String fullPath;

    public String appName;

    private static JavaSparkContext jscSingleton = null;

    public TestUtils(Clreplaced callerClreplaced) {
        appName = callerClreplaced.getSimpleName();
        defaultTestFolder = String.format(defaultTestFolder, appName + "_TEST");
        initTestDir();
    }

    public String initTestDir() {
        testDir = new File(defaultTestFolder);
        if (testDir.exists())
            cleanTestDir();
        if (!testDir.mkdirs())
            throw new RuntimeException(String.format("Failed to create testdir %s", testDir.getAbsolutePath()));
        fullPath = testDir.getAbsolutePath();
        return fullPath;
    }

    public void cleanTestDir() {
        try {
            FileUtils.deleteDirectory(testDir);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public JavaSparkContext getSparkContext() {
        if (jscSingleton == null) {
            SparkConf sparkConf = new SparkConf().setAppName(appName);
            sparkConf.setMaster(CLUSTER_CONF_LOCAL_4);
            jscSingleton = new JavaSparkContext(sparkConf);
        }
        return jscSingleton;
    }

    public SparkJni getSparkJni(String clreplacedpath) {
        return new SparkJniBuilder().appName(appName).nativePath(fullPath).clreplacedpath(clreplacedpath).build();
    }

    public String getLibPath() {
        return JniUtils.generateDefaultLibPath(appName, fullPath);
    }

    public SparkJni getSparkJni() {
        return getSparkJni(JniUtils.getClreplacedpath());
    }
}

19 Source : SparkJni.java
with Apache License 2.0
from tudorv91

public clreplaced SparkJni {

    private final MetadataHandler metadataHandler;

    private final DeployTimesLogger deployTimesLogger;

    private final JniLinkHandler jniLinkHandler;

    private final MakefileGenerator makefileGenerator;

    private final NativeFunctionPrototypesCollector nativeFunctionPrototypesCollector;

    private static JniRootContainer jniRootContainer;

    private static JavaSparkContext javaSparkContext;

    private HashMap<String, String> functionCodeInjectorMap;

    private DeployMode deployMode;

    private boolean overWriteKernelFile = false;

    @Inject
    private SparkJni(@Nonnull MetadataHandler metadataHandler, @Nonnull DeployTimesLogger deployTimesLogger, @Nonnull Provider<JniLinkHandler> jniLinkHandlerProvider, @Nonnull MakefileGenerator makefileGenerator, @Nonnull NativeFunctionPrototypesCollector nativeFunctionPrototypesCollector) {
        this.metadataHandler = metadataHandler;
        this.deployTimesLogger = deployTimesLogger;
        this.jniLinkHandler = jniLinkHandlerProvider.get();
        this.makefileGenerator = makefileGenerator;
        this.nativeFunctionPrototypesCollector = nativeFunctionPrototypesCollector;
        // by default, follow the entire deploy process
        deployMode = new DeployMode(DeployMode.DeployModes.FULL_GENERATE_AND_BUILD);
    }

    @Builder.Factory
    static SparkJni sparkJni(@Nonnull Optional<String> appName, @Nonnull String nativePath, @Nonnull Optional<String> jdkPath, @Nonnull Optional<String> clreplacedpath) {
        final SparkJni sparkJniSingleton = injectSparkJni();
        sparkJniSingleton.initVars(appName.isPresent() ? appName.get() : null, nativePath, jdkPath.isPresent() ? jdkPath.get() : null);
        clreplacedpath.transform(new Function<String, Object>() {

            @Nullable
            @Override
            public Object apply(@Nullable String s) {
                sparkJniSingleton.addToClreplacedpath(s);
                return new Object();
            }
        });
        return sparkJniSingleton;
    }

    private void initVars(String appName, String nativePath, String jdkPath) {
        setAppName(appName);
        setNativePath(nativePath);
        setJdkPath(jdkPath);
    }

    public void deploy() {
        deployTimesLogger.start = System.currentTimeMillis();
        processCppContent();
        loadNativeLib();
    }

    public void deployWithCodeInjections(HashMap<String, String> functionCodeInjectorMap) {
        if (!functionCodeInjectorMap.isEmpty()) {
            this.functionCodeInjectorMap = functionCodeInjectorMap;
        }
        deploy();
    }

    private void loadNativeLib() {
        String libraryFullPath = JniUtils.generateDefaultLibPath(metadataHandler.getAppName(), metadataHandler.getNativePath());
        if (javaSparkContext != null) {
            javaSparkContext.addFile(libraryFullPath);
        } else {
            System.load(libraryFullPath);
        }
        deployTimesLogger.libLoadTime = System.currentTimeMillis() - deployTimesLogger.start;
    }

    private void processCppContent() {
        checkNativePath();
        jniLinkHandler.deployLink();
        executeAndBenchmarkJavah();
        generateAndCheckMakefile();
        generateJniRootContainer();
        generateKernelFiles();
        build();
    }

    private void executeAndBenchmarkJavah() {
        long startJavah = System.currentTimeMillis();
        if (deployMode.doJavah) {
            jniLinkHandler.javah(metadataHandler.getClreplacedpath());
        }
        nativeFunctionPrototypesCollector.collectNativeFunctionPrototypes();
        deployTimesLogger.javahTime = System.currentTimeMillis() - startJavah;
    }

    public void addToClreplacedpath(String... clreplacedpath) {
        for (String cPath : clreplacedpath) {
            metadataHandler.addToClreplacedpath(cPath);
        }
    }

    private void generateJniRootContainer() {
        jniRootContainer = ImmutableJniRootContainerProvider.builder().build().buildJniRootContainer(metadataHandler.getNativePath(), metadataHandler.getAppName());
    }

    private void generateKernelFiles() {
        KernelFileWrapperHeader kernelFileWrapperHeader = getKernelFileWrapperHeader();
        if (!deployMode.doForceOverwriteKernelWrappers) {
            return;
        }
        if (!kernelFileWrapperHeader.writeKernelWrapperFile()) {
            throw new HardSparkJniException(Messages.ERR_KERNEL_FILE_GENERATION_FAILED);
        }
        if (deployMode.doForceOverwriteKernelWrappers) {
            KernelFile kernelFile = kernelFileWrapperHeader.getKernelFile();
            if (functionCodeInjectorMap != null && !functionCodeInjectorMap.isEmpty()) {
                injectFunctionCodeBody(kernelFile.userNativeFunctions());
            }
            kernelFile.writeKernelFile(overWriteKernelFile);
        }
    }

    private void injectFunctionCodeBody(List<UserNativeFunction> userNativeFunctions) {
        for (UserNativeFunction userNativeFunction : userNativeFunctions) {
            String functionName = userNativeFunction.functionSignatureMapper().functionNameMapper().cppName();
            String codeBody = functionCodeInjectorMap.get(functionName);
            if (codeBody == null)
                continue;
            userNativeFunction.setFunctionBodyCodeInsertion(Optional.of(codeBody));
        }
    }

    private void generateAndCheckMakefile() {
        if (deployMode.doGenerateMakefile)
            if (!makefileGenerator.generateMakefile(deployMode)) {
                System.err.println(Messages.MAKEFILE_GENERATION_FAILED_ERROR);
                System.exit(3);
            }
    }

    private void build() {
        String nativePath = metadataHandler.getNativePath();
        deployTimesLogger.genTime = System.currentTimeMillis() - deployTimesLogger.start - deployTimesLogger.javahTime;
        deployTimesLogger.start = System.currentTimeMillis();
        if (deployMode.doBuild) {
            JniUtils.runProcess(String.format(CppSyntax.EXEC_MAKE_CLEAN, nativePath));
            JniUtils.runProcess(String.format(CppSyntax.EXEC_MAKE, nativePath));
        }
        deployTimesLogger.buildTime = System.currentTimeMillis() - deployTimesLogger.start;
    }

    private void checkNativePath() {
        if (metadataHandler.getNativePath() == null) {
            System.err.println(Messages.NATIVE_PATH_NOT_SET);
            System.exit(1);
        }
        File nativePathDir = new File(metadataHandler.getNativePath());
        if (!nativePathDir.exists() || !nativePathDir.isDirectory()) {
            System.err.println(Messages.NATIVE_PATH_ERROR + ":" + nativePathDir.getAbsolutePath());
            System.exit(2);
        }
    }

    public void registerClreplacedifier(SparkJniClreplacedifier sparkJniClreplacedifier) {
        for (Clreplaced functionClreplaced : sparkJniClreplacedifier.getJniFunctionClreplacedes()) {
            registerJniFunction(functionClreplaced);
        }
        for (Clreplaced beanClreplaced : sparkJniClreplacedifier.getBeanClreplacedes()) {
            registerContainer(beanClreplaced);
        }
    }

    /**
     * Set the user defines pragma for the build stage flags.
     *
     * @param userDefines
     */
    @SuppressWarnings("unused")
    public SparkJni setUserDefines(String userDefines) {
        metadataHandler.setUserDefines(userDefines);
        return this;
    }

    /**
     * Set the personalized user directories.
     *
     * @param userLibraryDirs
     */
    @SuppressWarnings("unused")
    public SparkJni setUserLibraryDirs(String userLibraryDirs) {
        metadataHandler.setUserLibraryDirs(userLibraryDirs);
        return this;
    }

    public SparkJni setSparkContext(JavaSparkContext javaSparkContext) {
        SparkJni.javaSparkContext = javaSparkContext;
        return this;
    }

    /**
     * Set the personalized user include directories.
     *
     * @param userIncludeDirs
     */
    public SparkJni setUserIncludeDirs(String userIncludeDirs) {
        metadataHandler.setUserIncludeDirs(userIncludeDirs);
        return this;
    }

    @SuppressWarnings("unused")
    public SparkJni setUserLibraries(String userLibraries) {
        metadataHandler.setUserLibraries(userLibraries);
        return this;
    }

    public SparkJni setJdkPath(String jdkPath) {
        metadataHandler.setJdkPath(jdkPath);
        return this;
    }

    private SparkJni setNativePath(String nativePath) {
        metadataHandler.setNativePath(nativePath);
        return this;
    }

    private SparkJni setAppName(String appName) {
        metadataHandler.setAppName(appName);
        return this;
    }

    /**
     * Register the user-defined jni function.
     *
     * @param jniFunctionClreplaced
     */
    public SparkJni registerJniFunction(Clreplaced jniFunctionClreplaced) {
        jniLinkHandler.registerJniFunction(jniFunctionClreplaced);
        return this;
    }

    /**
     * Register the user-defined JavaBean container.
     *
     * @param beanClreplaced
     */
    public SparkJni registerContainer(Clreplaced beanClreplaced) {
        jniLinkHandler.registerBean(beanClreplaced);
        return this;
    }

    public JniLinkHandler getJniHandler() {
        return jniLinkHandler;
    }

    private KernelFileWrapperHeader getKernelFileWrapperHeader() {
        return new KernelFileWrapperHeader(jniLinkHandler.getContainerHeaderFiles(), jniRootContainer);
    }

    public JniRootContainer getJniRootContainer() {
        return jniRootContainer;
    }

    public SparkJni setDeployMode(DeployMode deployMode) {
        this.deployMode = deployMode;
        return this;
    }

    @SuppressWarnings("unused")
    public DeployTimesLogger getDeployTimesLogger() {
        return deployTimesLogger;
    }

    public DeployMode getDeployMode() {
        return deployMode;
    }

    ClreplacedLoader getClreplacedloader() {
        return metadataHandler.getClreplacedloader();
    }

    public void setClreplacedloader(ClreplacedLoader clreplacedloader) {
        metadataHandler.setClreplacedloader(clreplacedloader);
    }

    public void setOverwriteKernelFile(boolean overwriteKernelFile) {
        this.overWriteKernelFile = overwriteKernelFile;
    }
}

19 Source : SparkJni.java
with Apache License 2.0
from tudorv91

public SparkJni setSparkContext(JavaSparkContext javaSparkContext) {
    SparkJni.javaSparkContext = javaSparkContext;
    return this;
}

19 Source : TotalSumExample.java
with Apache License 2.0
from SpiRITlab

/**
 * This method performs the total sum operation on a plaintext vector and print out the result
 * @param jsc spark context which allows the communication with worker nodes
 * @param slices the number of time a task is split up
 */
public static void test_basic_total_sum(JavaSparkContext jsc, int slices) {
    System.out.println("test_basic_total_sum");
    // distribute a local Scala collection (lists in this case) to form 2 RDDs
    JavaRDD<Integer> values_RDD = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100), slices);
    // sum up the values and display
    System.out.println("values_RDD:" + values_RDD.reduce((x, y) -> {
        // we need to load the shared library and init a copy of SparkFHE on the executor
        SparkFHEPlugin.setup();
        return (x + y);
    }));
}

19 Source : TotalSumExample.java
with Apache License 2.0
from SpiRITlab

/**
 * This method performs the total sum operation on a plaintext vector and print out the result
 * @param jsc spark context which allows the communication with worker nodes
 * @param slices the number of time a task is split up
 */
public static void test_basic_total_sum(JavaSparkContext jsc, int slices) {
    System.out.println("test_basic_total_sum");
    // distribute a local Scala collection (lists in this case) to form 2 RDDs
    JavaRDD<Integer> values_RDD = jsc.parallelize(Arrays.asList(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), slices);
    // sum up the values and display
    System.out.println("values_RDD:" + values_RDD.reduce((x, y) -> {
        // we need to load the shared library and init a copy of SparkFHE on the executor
        SparkFHEPlugin.setup();
        return (x + y);
    }));
}

19 Source : CustomReportServiceTest.java
with Apache License 2.0
from sbl-sdsc

/**
 * @author Peter Rose
 */
public clreplaced CustomReportServiceTest {

    private JavaSparkContext sc;

    @Before
    public void setUp() throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(AdvancedQueryTest.clreplaced.getSimpleName());
        sc = new JavaSparkContext(conf);
    }

    @After
    public void tearDown() throws Exception {
        sc.close();
    }

    @Test
    public void test1() throws IOException {
        Dataset<Row> ds = CustomReportService.getDataset("pmc", "pubmedId", "depositionDate");
        replacedertEquals("StructType(StructField(structureId,StringType,true), StructField(pmc,StringType,true), StructField(pubmedId,IntegerType,true), StructField(depositionDate,TimestampType,true))", ds.schema().toString());
        replacedertTrue(ds.count() > 130101);
    }

    @Test
    public void test2() throws IOException {
        Dataset<Row> ds = CustomReportService.getDataset("ecNo");
        replacedertEquals("StructType(StructField(structureChainId,StringType,true), StructField(structureId,StringType,true), StructField(chainId,StringType,true), StructField(ecNo,StringType,true))", ds.schema().toString());
        replacedertTrue(ds.count() > 130101);
    }
}

19 Source : WildTypeTest.java
with Apache License 2.0
from sbl-sdsc

public clreplaced WildTypeTest {

    private static JavaSparkContext sc;

    private static JavaPairRDD<String, StructureDataInterface> pdb;

    @Before
    public void setUp() throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(WildTypeTest.clreplaced.getSimpleName());
        sc = new JavaSparkContext(conf);
        // 1PEN wildtype query 100 matches: 1PEN:1
        // 1OCZ two enreplacedies wildtype query 100 matches: 1OCZ:1, 1OCZ:2
        // 2ONX structure result for author query
        List<String> pdbIds = Arrays.asList("1PEN", "1OCZ", "2ONX");
        pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
    }

    @After
    public void tearDown() throws Exception {
        sc.close();
    }

    // TODO the wildtype webservice of RCSB PDB is currently broken
    // @Test
    public void test1() throws IOException {
        pdb = pdb.filter(new WildType(true, 100));
        List<String> results = pdb.keys().collect();
        replacedertTrue(results.contains("1PEN"));
        replacedertTrue(results.contains("1OCZ"));
        replacedertFalse(results.contains("2ONX"));
    }
}

19 Source : PiscesTest.java
with Apache License 2.0
from sbl-sdsc

public clreplaced PiscesTest {

    private JavaSparkContext sc;

    private JavaPairRDD<String, StructureDataInterface> pdb;

    @Before
    public void setUp() throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(PolymerCompositionTest.clreplaced.getSimpleName());
        sc = new JavaSparkContext(conf);
        // "4R4X.A" and "5X42.B" should preplaced filter
        List<String> pdbIds = Arrays.asList("5X42", "4R4X", "2ONX", "1JLP");
        pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
    }

    @After
    public void tearDown() throws Exception {
        sc.close();
    }

    @Test
    public void test1() throws IOException {
        pdb = pdb.filter(new Pisces(20, 2.0));
        List<String> results = pdb.keys().collect();
        replacedertTrue(results.contains("5X42"));
        replacedertTrue(results.contains("4R4X"));
        replacedertFalse(results.contains("2ONX"));
        replacedertFalse(results.contains("1JLP"));
    }

    @Test
    public void test2() throws IOException {
        pdb = pdb.flatMapToPair(new StructureToPolymerChains());
        pdb = pdb.filter(new Pisces(20, 2.0));
        List<String> results = pdb.keys().collect();
        replacedertTrue(results.contains("5X42.B"));
        replacedertTrue(results.contains("4R4X.A"));
        replacedertFalse(results.contains("5X42.A"));
        replacedertFalse(results.contains("2ONX.A"));
        replacedertFalse(results.contains("1JLP.A"));
    }
}

19 Source : PdbjMineSearchTest.java
with Apache License 2.0
from sbl-sdsc

public clreplaced PdbjMineSearchTest {

    private JavaSparkContext sc;

    private JavaPairRDD<String, StructureDataInterface> pdb;

    @Before
    public void setUp() throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(PdbjMineSearchTest.clreplaced.getSimpleName());
        sc = new JavaSparkContext(conf);
        List<String> pdbIds = Arrays.asList("1FIN", "5JDE", "5CU4", "5L6W", "5UFU", "5IHB");
        pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
    }

    @After
    public void tearDown() throws Exception {
        sc.close();
    }

    @Test
    public /**
     * This test runs a SQL query and compares the results at the PDB entry
     * level
     *
     * @throws IOException
     */
    void test1() throws IOException {
        String sql = "SELECT pdbid, chain FROM sifts.pdb_chain_enzyme WHERE ec_number = '2.7.11.1'";
        pdb = pdb.filter(new PdbjMineSearch(sql));
        List<String> matches = pdb.keys().collect();
        replacedertTrue(matches.contains("5JDE"));
        replacedertTrue(matches.contains("5CU4"));
        replacedertTrue(matches.contains("5L6W"));
        replacedertTrue(matches.contains("5UFU"));
        replacedertFalse(matches.contains("5IHB"));
        replacedertFalse(matches.contains("1FIN"));
    }

    @Test
    public /**
     * This test runs a chain level SQL query and compares chain level results
     *
     * @throws IOException
     */
    void test2() throws IOException {
        String sql = "SELECT e.pdbid, e.chain FROM sifts.pdb_chain_enzyme AS e WHERE e.ec_number = '2.7.11.1'";
        pdb = pdb.flatMapToPair(new StructureToPolymerChains());
        pdb = pdb.filter(new PdbjMineSearch(sql));
        List<String> matches = pdb.keys().collect();
        replacedertTrue(matches.contains("5JDE.A"));
        replacedertTrue(matches.contains("5JDE.B"));
        replacedertTrue(matches.contains("5CU4.A"));
        // this chain is EC 2.7.11.1
        replacedertTrue(matches.contains("5L6W.L"));
        // this chain in not EC 2.7.11.1
        replacedertFalse(matches.contains("5L6W.C"));
        replacedertTrue(matches.contains("5UFU.A"));
        replacedertFalse(matches.contains("5UFU.B"));
        replacedertFalse(matches.contains("5UFU.C"));
        replacedertFalse(matches.contains("5IHB.A"));
        replacedertFalse(matches.contains("5IHB.B"));
        replacedertFalse(matches.contains("5IHB.C"));
        replacedertFalse(matches.contains("5IHB.D"));
    }
}

19 Source : CustomReportQueryTest.java
with Apache License 2.0
from sbl-sdsc

public clreplaced CustomReportQueryTest {

    private JavaSparkContext sc;

    private JavaPairRDD<String, StructureDataInterface> pdb;

    @Before
    public void setUp() throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(CustomReportQueryTest.clreplaced.getSimpleName());
        sc = new JavaSparkContext(conf);
        List<String> pdbIds = Arrays.asList("5JDE", "5CU4", "5L6W", "5UFU", "5IHB");
        pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
    }

    @After
    public void tearDown() throws Exception {
        sc.close();
    }

    @Test
    public /**
     * This test runs a chain level query and compares the results at the PDB entry level
     * @throws IOException
     */
    void test1() throws IOException {
        String whereClause = "WHERE ecNo='2.7.11.1' AND source='replaced sapiens'";
        pdb = pdb.filter(new CustomReportQuery(whereClause, "ecNo", "source"));
        List<String> matches = pdb.keys().collect();
        replacedertTrue(matches.contains("5JDE"));
        replacedertTrue(matches.contains("5CU4"));
        replacedertTrue(matches.contains("5L6W"));
        replacedertFalse(matches.contains("5UFU"));
        replacedertFalse(matches.contains("5IHB"));
    }

    @Test
    public /**
     *  This test runs a chain level query and compares chain level results
     * @throws IOException
     */
    void test2() throws IOException {
        pdb = pdb.flatMapToPair(new StructureToPolymerChains());
        String whereClause = "WHERE ecNo='2.7.11.1' AND source='replaced sapiens'";
        pdb = pdb.filter(new CustomReportQuery(whereClause, "ecNo", "source"));
        List<String> matches = pdb.keys().collect();
        replacedertTrue(matches.contains("5JDE.A"));
        replacedertTrue(matches.contains("5JDE.B"));
        replacedertTrue(matches.contains("5CU4.A"));
        // this chain is EC 2.7.11.1
        replacedertTrue(matches.contains("5L6W.L"));
        // this chain in not EC 2.7.11.1
        replacedertFalse(matches.contains("5L6W.C"));
        replacedertFalse(matches.contains("5UFU.A"));
        replacedertFalse(matches.contains("5UFU.B"));
        replacedertFalse(matches.contains("5UFU.C"));
        replacedertFalse(matches.contains("5IHB.A"));
        replacedertFalse(matches.contains("5IHB.B"));
        replacedertFalse(matches.contains("5IHB.C"));
        replacedertFalse(matches.contains("5IHB.D"));
    }
}

19 Source : ChemicalStructureQueryTest.java
with Apache License 2.0
from sbl-sdsc

public clreplaced ChemicalStructureQueryTest {

    private JavaSparkContext sc;

    private JavaPairRDD<String, StructureDataInterface> pdb;

    @Before
    public void setUp() throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ChemicalStructureQueryTest.clreplaced.getSimpleName());
        sc = new JavaSparkContext(conf);
        List<String> pdbIds = Arrays.asList("1HYA", "2ONX", "1F27", "4QMC", "2RTL");
        pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
    }

    @After
    public void tearDown() throws Exception {
        sc.close();
    }

    @Test
    public void test1() throws IOException {
        pdb = pdb.filter(new ChemicalStructureQuery("CC(=O)NC1C(O)OC(CO)C(O)C1O"));
        List<String> results = pdb.keys().collect();
        replacedertTrue(results.contains("1HYA"));
        replacedertFalse(results.contains("2ONX"));
    }

    @Test
    public void test2() throws IOException {
        pdb = pdb.filter(new ChemicalStructureQuery("OC(=O)CCCC[C@@H]1SC[C@@H]2NC(=O)N[C@H]12", ChemicalStructureQuery.EXACT, 0));
        List<String> results = pdb.keys().collect();
        replacedertFalse(results.contains("1HYA"));
        replacedertFalse(results.contains("2ONX"));
        replacedertTrue(results.contains("1F27"));
        replacedertFalse(results.contains("2RTL"));
        replacedertFalse(results.contains("4QMC"));
    }

    @Test
    public void test3() throws IOException {
        pdb = pdb.filter(new ChemicalStructureQuery("OC(=O)CCCC[C@@H]1SC[C@@H]2NC(=O)N[C@H]12", ChemicalStructureQuery.SUBSTRUCTURE, 0));
        List<String> results = pdb.keys().collect();
        replacedertFalse(results.contains("1HYA"));
        replacedertFalse(results.contains("2ONX"));
        replacedertTrue(results.contains("1F27"));
        replacedertFalse(results.contains("2RTL"));
        replacedertTrue(results.contains("4QMC"));
    }

    @Test
    public void test4() throws IOException {
        pdb = pdb.filter(new ChemicalStructureQuery("OC(=O)CCCC[C@@H]1SC[C@@H]2NC(=O)N[C@H]12", ChemicalStructureQuery.SIMILAR, 70));
        List<String> results = pdb.keys().collect();
        replacedertFalse(results.contains("1HYA"));
        replacedertFalse(results.contains("2ONX"));
        replacedertTrue(results.contains("1F27"));
        replacedertTrue(results.contains("2RTL"));
        replacedertTrue(results.contains("4QMC"));
    }

    @Test
    public void test5() throws IOException {
        pdb = pdb.filter(new ChemicalStructureQuery("OC(=O)CCCC[C@H]1[C@H]2NC(=O)N[C@H]2C[S@@]1=O", ChemicalStructureQuery.SUPERSTRUCTURE, 0));
        List<String> results = pdb.keys().collect();
        replacedertFalse(results.contains("1HYA"));
        replacedertFalse(results.contains("2ONX"));
        replacedertTrue(results.contains("1F27"));
        replacedertFalse(results.contains("2RTL"));
        replacedertTrue(results.contains("4QMC"));
    }
}

19 Source : BlastClustersTest.java
with Apache License 2.0
from sbl-sdsc

public clreplaced BlastClustersTest {

    private JavaSparkContext sc;

    private JavaPairRDD<String, StructureDataInterface> pdb;

    @Before
    public void setUp() throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(BlastClustersTest.clreplaced.getSimpleName());
        sc = new JavaSparkContext(conf);
        List<String> pdbIds = Arrays.asList("1O06", "2ONX");
        pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
    }

    @After
    public void tearDown() throws Exception {
        sc.close();
    }

    @Test
    public /**
     * This test runs a pdb level query and compares the results at the PDB entry level
     * @throws IOException
     */
    void test1() throws IOException, StructureException {
        pdb = pdb.filter(new BlastClusters(40));
        List<String> matches = pdb.keys().collect();
        replacedertTrue(matches.contains("1O06"));
        replacedertFalse(matches.contains("1O06.A"));
        replacedertFalse(matches.contains("2ONX"));
    }

    @Test
    public /**
     * This test runs a chain level query and compares the results at the PDB entry level
     * @throws IOException
     */
    void test2() throws IOException, StructureException {
        pdb = pdb.filter(new BlastClusters(40));
        pdb = pdb.flatMapToPair(new StructureToPolymerChains());
        List<String> matches = pdb.keys().collect();
        replacedertFalse(matches.contains("1O06"));
        replacedertTrue(matches.contains("1O06.A"));
        replacedertFalse(matches.contains("2ONX.A"));
    }
}

19 Source : AdvancedQueryTest.java
with Apache License 2.0
from sbl-sdsc

public clreplaced AdvancedQueryTest {

    private static JavaSparkContext sc;

    private static JavaPairRDD<String, StructureDataInterface> pdb;

    @Before
    public void setUp() throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(AdvancedQueryTest.clreplaced.getSimpleName());
        sc = new JavaSparkContext(conf);
        // 1PEN wildtype query 100 matches: 1PEN:1
        // 1OCZ two enreplacedies wildtype query 100 matches: 1OCZ:1, 1OCZ:2
        // 2ONX structure result for author query
        // 5L6W two chains: chain L is EC 2.7.11.1, chain chain C is not EC 2.7.11.1
        // 5KHU many chains, chain Q is EC 2.7.11.1
        // 1F3M enreplacedy 1: chains A,B, enreplacedy 2: chains B,C, all chains are EC 2.7.11.1
        List<String> pdbIds = Arrays.asList("1PEN", "1OCZ", "2ONX", "5L6W", "5KHU", "1F3M");
        pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
    }

    @After
    public void tearDown() throws Exception {
        sc.close();
    }

    // TODO the wildtype query web service is currently broken
    // @Test
    /**
     * This test runs a chain level query and compares the results at the PDB entry level
     * @throws IOException
     */
    public void test1() throws IOException {
        String query = "<orgPdbQuery>" + "<queryType>org.pdb.query.simple.WildTypeProteinQuery</queryType>" + "<includeExprTag>Y</includeExprTag>" + "<percentSeqAlignment>100</percentSeqAlignment>" + "</orgPdbQuery>";
        pdb = pdb.filter(new AdvancedQuery(query));
        List<String> matches = pdb.keys().collect();
        replacedertTrue(matches.contains("1PEN"));
        replacedertTrue(matches.contains("1OCZ"));
        replacedertFalse(matches.contains("2ONX"));
        replacedertFalse(matches.contains("5L6W"));
    }

    @Test
    public /**
     * This test runs a chain level query and compares the results at the PDB entry level
     * @throws IOException
     */
    void test2() throws IOException {
        String query = "<orgPdbQuery>" + "<queryType>org.pdb.query.simple.AdvancedAuthorQuery</queryType>" + "<searchType>All Authors</searchType><audit_author.name>Eisenberg</audit_author.name>" + "<exactMatch>false</exactMatch>" + "</orgPdbQuery>";
        pdb = pdb.filter(new AdvancedQuery(query));
        List<String> matches = pdb.keys().collect();
        replacedertFalse(matches.contains("1PEN"));
        replacedertFalse(matches.contains("1OCZ"));
        replacedertTrue(matches.contains("2ONX"));
        replacedertFalse(matches.contains("5L6W"));
    }

    @Test
    public /**
     * This test runs a chain level query and compares the results at the PDB entry level
     * @throws IOException
     */
    void test3() throws IOException {
        String query = "<orgPdbQuery>" + "<queryType>org.pdb.query.simple.EnzymeClreplacedificationQuery</queryType>" + "<Enzyme_Clreplacedification>2.7.11.1</Enzyme_Clreplacedification>" + "</orgPdbQuery>";
        pdb = pdb.filter(new AdvancedQuery(query));
        List<String> matches = pdb.keys().collect();
        replacedertFalse(matches.contains("1PEN"));
        replacedertFalse(matches.contains("1OCZ"));
        replacedertFalse(matches.contains("2ONX"));
        replacedertTrue(matches.contains("5L6W"));
        replacedertTrue(matches.contains("5KHU"));
    }

    @Test
    public /**
     * This test runs a chain level query and compares the results at the PDB chain level
     * @throws IOException
     */
    void test4() throws IOException {
        String query = "<orgPdbQuery>" + "<queryType>org.pdb.query.simple.EnzymeClreplacedificationQuery</queryType>" + "<Enzyme_Clreplacedification>2.7.11.1</Enzyme_Clreplacedification>" + "</orgPdbQuery>";
        pdb = pdb.flatMapToPair(new StructureToPolymerChains());
        pdb = pdb.filter(new AdvancedQuery(query));
        List<String> matches = pdb.keys().collect();
        replacedertFalse(matches.contains("1PEN.A"));
        replacedertFalse(matches.contains("1OCZ.A"));
        replacedertFalse(matches.contains("2ONX.A"));
        // only this chain is EC 2.7.11.1
        replacedertTrue(matches.contains("5L6W.L"));
        replacedertFalse(matches.contains("5L6W.C"));
        replacedertFalse(matches.contains("5KHU.A"));
        replacedertFalse(matches.contains("5KHU.B"));
        // only this chain is EC 2.7.11.1
        replacedertTrue(matches.contains("5KHU.Q"));
        // 1F3M all chains are EC 2.7.11.1
        replacedertTrue(matches.contains("1F3M.A"));
        replacedertTrue(matches.contains("1F3M.B"));
        replacedertTrue(matches.contains("1F3M.C"));
        replacedertTrue(matches.contains("1F3M.D"));
    }
}

19 Source : CoordinationGeometryTest.java
with Apache License 2.0
from sbl-sdsc

public clreplaced CoordinationGeometryTest {

    private JavaSparkContext sc;

    private JavaPairRDD<String, StructureDataInterface> pdb;

    @Before
    public void setUp() throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ColumnarStructureTest.clreplaced.getSimpleName());
        sc = new JavaSparkContext(conf);
        List<String> pdbIds = Arrays.asList("5Y20");
        pdb = MmtfReader.downloadFullMmtfFiles(pdbIds, sc);
    }

    @After
    public void tearDown() throws Exception {
        sc.close();
    }

    @Test
    public void test() {
        StructureDataInterface structure = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(structure, true);
        // ZN A.101.ZN
        Point3d center = getCoords(cs, 459);
        Point3d[] neighbors = new Point3d[6];
        // CYS A.7.SG
        neighbors[0] = getCoords(cs, 28);
        // CYS A.10.SG
        neighbors[1] = getCoords(cs, 44);
        // HIS A.31.ND1
        neighbors[2] = getCoords(cs, 223);
        // CYS A.34.SG
        neighbors[3] = getCoords(cs, 245);
        // CYS A.10.N
        neighbors[4] = getCoords(cs, 45);
        // HIS A.31.O
        neighbors[5] = getCoords(cs, 220);
        CoordinationGeometry geom = new CoordinationGeometry(center, neighbors);
        double q3Expected = 0.9730115379131878;
        replacedertEquals(q3Expected, geom.q3(), 0.0001);
        double q4Expected = 0.9691494056145086;
        replacedertEquals(q4Expected, geom.q4(), 0.0001);
        double q5Expected = 0.5126001729084566;
        replacedertEquals(q5Expected, geom.q5(), 0.0001);
        double q6Expected = 0.2723305441457363;
        replacedertEquals(q6Expected, geom.q6(), 0.0001);
    }

    private static Point3d getCoords(ColumnarStructure cs, int index) {
        return new Point3d(cs.getxCoords()[index], cs.getyCoords()[index], cs.getzCoords()[index]);
    }
}

19 Source : ColumnarStructureXTest.java
with Apache License 2.0
from sbl-sdsc

public clreplaced ColumnarStructureXTest {

    private JavaSparkContext sc;

    @Before
    public void setUp() throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ColumnarStructureTest.clreplaced.getSimpleName());
        sc = new JavaSparkContext(conf);
    }

    @After
    public void tearDown() throws Exception {
        sc.close();
    }

    @Test
    public void test1() {
        List<String> pdbIds = Arrays.asList("5NVB");
        JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.downloadFullMmtfFiles(pdbIds, sc);
        StructureDataInterface s = pdb.values().first();
        ColumnarStructureX cs = new ColumnarStructureX(s, true);
        replacedertEquals(cs.getNormalizedbFactors()[0], Float.MAX_VALUE, 0.000001);
    }

    @Test
    public void test2() {
        List<String> pdbIds = Arrays.asList("4QXX");
        JavaPairRDD<String, StructureDataInterface> pdb = MmtfReader.downloadFullMmtfFiles(pdbIds, sc);
        StructureDataInterface s = pdb.values().first();
        ColumnarStructureX cs = new ColumnarStructureX(s, true);
        replacedertTrue(cs.isGroupWithAlternateLocations()[6]);
    }
}

19 Source : ColumnarStructureTest.java
with Apache License 2.0
from sbl-sdsc

public clreplaced ColumnarStructureTest {

    private JavaSparkContext sc;

    private JavaPairRDD<String, StructureDataInterface> pdb;

    @Before
    public void setUp() throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ColumnarStructureTest.clreplaced.getSimpleName());
        sc = new JavaSparkContext(conf);
        List<String> pdbIds = Arrays.asList("1STP");
        pdb = MmtfReader.downloadFullMmtfFiles(pdbIds, sc);
    }

    @After
    public void tearDown() throws Exception {
        sc.close();
    }

    @Test
    public void testGetxCoords() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        replacedertEquals(26.260, cs.getxCoords()[20], 0.001);
    }

    @Test
    public void testGetElements() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        replacedertEquals("C", cs.getElements()[20]);
    }

    @Test
    public void testGetAtomNames() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        replacedertEquals("CG2", cs.getAtomNames()[900]);
    }

    @Test
    public void testGetGroupNames() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        replacedertEquals("VAL", cs.getGroupNames()[900]);
    }

    @Test
    public void testIsPolymer() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        // chain A
        replacedertEquals(true, cs.isPolymer()[100]);
        // BTN
        replacedertEquals(false, cs.isPolymer()[901]);
        // HOH
        replacedertEquals(false, cs.isPolymer()[917]);
    }

    @Test
    public void testGetGroupNumbers() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        replacedertEquals("130", cs.getGroupNumbers()[877]);
    }

    @Test
    public void testGetChainIds() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        replacedertEquals("A", cs.getChainIds()[100]);
        // BTN
        replacedertEquals("B", cs.getChainIds()[901]);
        // HOH
        replacedertEquals("C", cs.getChainIds()[917]);
    }

    @Test
    public void testGetChemCompTypes() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        replacedertEquals("PEPTIDE LINKING", cs.getChemCompTypes()[100]);
        // BTN
        replacedertEquals("NON-POLYMER", cs.getChemCompTypes()[901]);
        // HOH
        replacedertEquals("NON-POLYMER", cs.getChemCompTypes()[917]);
    }

    @Test
    public void testGetEnreplacedyTypes() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        replacedertEquals("PRO", cs.getEnreplacedyTypes()[100]);
        // BTN
        replacedertEquals("LGO", cs.getEnreplacedyTypes()[901]);
        // HOH
        replacedertEquals("WAT", cs.getEnreplacedyTypes()[917]);
    }

    @Test
    public void testGetChainEnreplacedyTypes() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        String[] enreplacedyTypes = cs.getChainEnreplacedyTypes();
        replacedertEquals("PRO", enreplacedyTypes[0]);
        // BTN
        replacedertEquals("LGO", enreplacedyTypes[1]);
        // HOH
        replacedertEquals("WAT", enreplacedyTypes[2]);
    }

    @Test
    public void testGroupToAtomIndices() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        int[] groupToAtomIndices = cs.getGroupToAtomIndices();
        // ALA-13
        replacedertEquals(0, groupToAtomIndices[0]);
        // GLU-14
        replacedertEquals(5, groupToAtomIndices[1]);
        replacedertEquals(14, groupToAtomIndices[2]);
        // last HOH
        replacedertEquals(1000, groupToAtomIndices[205]);
        // end
        replacedertEquals(1001, groupToAtomIndices[206]);
    }

    @Test
    public void testChainToAtomIndices() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        int[] chainToAtomIndices = cs.getChainToAtomIndices();
        // chain A
        replacedertEquals(0, chainToAtomIndices[0]);
        // BTN
        replacedertEquals(901, chainToAtomIndices[1]);
        // HOH
        replacedertEquals(917, chainToAtomIndices[2]);
        // end
        replacedertEquals(1001, chainToAtomIndices[3]);
    }

    @Test
    public void testChainToGroupIndices() {
        StructureDataInterface s = pdb.values().first();
        ColumnarStructure cs = new ColumnarStructure(s, true);
        int[] chainToGroupIndices = cs.getChainToGroupIndices();
        // chain A
        replacedertEquals(0, chainToGroupIndices[0]);
        // BTN
        replacedertEquals(121, chainToGroupIndices[1]);
        // HOH
        replacedertEquals(122, chainToGroupIndices[2]);
        // end
        replacedertEquals(206, chainToGroupIndices[3]);
    }
}

See More Examples