Here are the examples of the java api class org.apache.lucene.document.Field taken from open source projects.
1. TestSimilarityProvider#setUp()
Project: lucene-solr
File: TestSimilarityProvider.java
File: TestSimilarityProvider.java
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); PerFieldSimilarityWrapper sim = new ExampleSimilarityProvider(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())).setSimilarity(sim); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, iwc); Document doc = new Document(); Field field = newTextField("foo", "", Field.Store.NO); doc.add(field); Field field2 = newTextField("bar", "", Field.Store.NO); doc.add(field2); field.setStringValue("quick brown fox"); field2.setStringValue("quick brown fox"); iw.addDocument(doc); field.setStringValue("jumps over lazy brown dog"); field2.setStringValue("jumps over lazy brown dog"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); searcher.setSimilarity(sim); }
2. TestAutomatonQuery#setUp()
Project: lucene-solr
File: TestAutomatonQuery.java
File: TestAutomatonQuery.java
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); Document doc = new Document(); Field titleField = newTextField("title", "some title", Field.Store.NO); Field field = newTextField(FN, "this is document one 2345", Field.Store.NO); Field footerField = newTextField("footer", "a footer", Field.Store.NO); doc.add(titleField); doc.add(field); doc.add(footerField); writer.addDocument(doc); field.setStringValue("some text from doc two a short piece 5678.91"); writer.addDocument(doc); field.setStringValue("doc three has some different stuff" + " with numbers 1234 5678.9 and letter b"); writer.addDocument(doc); reader = writer.getReader(); searcher = newSearcher(reader); writer.close(); }
3. TestSpanMultiTermQueryWrapper#setUp()
Project: lucene-solr
File: TestSpanMultiTermQueryWrapper.java
File: TestSpanMultiTermQueryWrapper.java
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory); Document doc = new Document(); Field field = newTextField("field", "", Field.Store.NO); doc.add(field); field.setStringValue("quick brown fox"); iw.addDocument(doc); field.setStringValue("jumps over lazy broun dog"); iw.addDocument(doc); field.setStringValue("jumps over extremely very lazy broxn dog"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
4. TestIndexWriterReader#testForceMergeDeletes()
Project: lucene-solr
File: TestIndexWriterReader.java
File: TestIndexWriterReader.java
public void testForceMergeDeletes() throws Throwable { Directory dir = newDirectory(); final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); doc.add(newTextField("field", "a b c", Field.Store.NO)); Field id = newStringField("id", "", Field.Store.NO); doc.add(id); id.setStringValue("0"); w.addDocument(doc); id.setStringValue("1"); w.addDocument(doc); w.deleteDocuments(new Term("id", "0")); IndexReader r = w.getReader(); w.forceMergeDeletes(); w.close(); r.close(); r = DirectoryReader.open(dir); assertEquals(1, r.numDocs()); assertFalse(r.hasDeletions()); r.close(); dir.close(); }
5. TestDateSort#createDocument()
Project: lucene-solr
File: TestDateSort.java
File: TestDateSort.java
private Document createDocument(String text, long time) { Document document = new Document(); // Add the text field. Field textField = newTextField(TEXT_FIELD, text, Field.Store.YES); document.add(textField); // Add the date/time field. String dateTimeString = DateTools.timeToString(time, DateTools.Resolution.SECOND); Field dateTimeField = newStringField(DATE_TIME_FIELD, dateTimeString, Field.Store.YES); document.add(dateTimeField); document.add(new SortedDocValuesField(DATE_TIME_FIELD, new BytesRef(dateTimeString))); return document; }
6. TestDocValuesIndexing#testExcIndexingDocBeforeDocValues()
Project: lucene-solr
File: TestDocValuesIndexing.java
File: TestDocValuesIndexing.java
// LUCENE-6049 public void testExcIndexingDocBeforeDocValues() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setDocValuesType(DocValuesType.SORTED); ft.freeze(); Field field = new Field("test", "value", ft); field.setTokenStream(new TokenStream() { @Override public boolean incrementToken() { throw new RuntimeException("no"); } }); doc.add(field); expectThrows(RuntimeException.class, () -> { w.addDocument(doc); }); w.addDocument(new Document()); w.close(); dir.close(); }
7. TestPerFieldPostingsFormat2#doTestMixedPostings()
Project: lucene-solr
File: TestPerFieldPostingsFormat2.java
File: TestPerFieldPostingsFormat2.java
private void doTestMixedPostings(Codec codec) throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setCodec(codec); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); // turn on vectors for the checkindex cross-check ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPositions(true); Field idField = new Field("id", "", ft); Field dateField = new Field("date", "", ft); doc.add(idField); doc.add(dateField); for (int i = 0; i < 100; i++) { idField.setStringValue(Integer.toString(random().nextInt(50))); dateField.setStringValue(Integer.toString(random().nextInt(100))); iw.addDocument(doc); } iw.close(); // checkindex dir.close(); }
8. TestBackwardsCompatibility#addNoProxDoc()
Project: lucene-solr
File: TestBackwardsCompatibility.java
File: TestBackwardsCompatibility.java
private void addNoProxDoc(IndexWriter writer) throws IOException { Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setIndexOptions(IndexOptions.DOCS); Field f = new Field("content3", "aaa", customType); doc.add(f); FieldType customType2 = new FieldType(); customType2.setStored(true); customType2.setIndexOptions(IndexOptions.DOCS); f = new Field("content4", "aaa", customType2); doc.add(f); writer.addDocument(doc); }
9. TransUnitVariantClassBridge#set()
Project: zanata-server
File: TransUnitVariantClassBridge.java
File: TransUnitVariantClassBridge.java
@Override public void set(String s, Object value, Document document, LuceneOptions luceneOptions) { TransMemoryUnitVariant variant = (TransMemoryUnitVariant) value; String textToIndex = variant.getPlainTextSegment(); Field field = new Field(IndexFieldLabels.TRANS_UNIT_VARIANT_FIELD + variant.getLanguage(), textToIndex, luceneOptions.getStore(), luceneOptions.getIndex(), luceneOptions.getTermVector()); field.setBoost(luceneOptions.getBoost()); document.add(field); }
10. GroupSearchBridge#set()
Project: zanata-server
File: GroupSearchBridge.java
File: GroupSearchBridge.java
@Override public void set(String name, Object value, Document document, LuceneOptions luceneOptions) { HProject project = (HProject) value; Field field = new Field(PROJECT_FIELD, project.getSlug(), luceneOptions.getStore(), luceneOptions.getIndex(), luceneOptions.getTermVector()); field.setBoost(luceneOptions.getBoost()); document.add(field); }
11. CmsLuceneDocument#addDateField()
Project: opencms-core
File: CmsLuceneDocument.java
File: CmsLuceneDocument.java
/** * @see org.opencms.search.I_CmsSearchDocument#addDateField(java.lang.String, long, boolean) */ public void addDateField(String name, long date, boolean analyzed) { Field field = new Field(name, DateTools.dateToString(new Date(date), DateTools.Resolution.MILLISECOND), STORED_NOT_ANALYSED_TYPE); field.setBoost(0.0F); add(field); if (analyzed) { field = new Field(name + CmsSearchField.FIELD_DATE_LOOKUP_SUFFIX, getDateTerms(date), NOT_STORED_ANALYSED_TYPE); add(field); } }
12. MetadataBuilder#createDocument()
Project: LIRE
File: MetadataBuilder.java
File: MetadataBuilder.java
@Override public Document createDocument(BufferedImage image, String identifier) { Document doc = new Document(); if (identifier != null) { doc.add(new StringField(DocumentBuilder.FIELD_NAME_IDENTIFIER, identifier, Field.Store.YES)); } Field[] fields = getDescriptorFields(identifier); for (Field field : fields) { doc.add(field); } return doc; }
13. NodeIndexer#addParentChildRelation()
Project: jackrabbit
File: NodeIndexer.java
File: NodeIndexer.java
/** * Adds a parent child relation to the given <code>doc</code>. * * @param doc the document. * @param parentId the id of the parent node. * @throws ItemStateException if the parent node cannot be read. * @throws RepositoryException if the parent node does not have a child node * entry for the current node. */ protected void addParentChildRelation(Document doc, NodeId parentId) throws ItemStateException, RepositoryException { Field parentField = new Field(FieldNames.PARENT, false, parentId.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO); parentField.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY); doc.add(parentField); NodeState parent = (NodeState) stateProvider.getItemState(parentId); ChildNodeEntry child = parent.getChildNodeEntry(node.getNodeId()); if (child == null) { // is running in a cluster. throw new RepositoryException("Missing child node entry for node with id: " + node.getNodeId()); } Name name = child.getName(); addNodeName(doc, name.getNamespaceURI(), name.getLocalName()); }
14. RefreshListenersTests#index()
Project: elasticsearch
File: RefreshListenersTests.java
File: RefreshListenersTests.java
private Engine.Index index(String id, String testFieldValue) { String type = "test"; String uid = type + ":" + id; Document document = new Document(); document.add(new TextField("test", testFieldValue, Field.Store.YES)); Field uidField = new Field("_uid", type + ":" + id, UidFieldMapper.Defaults.FIELD_TYPE); Field versionField = new NumericDocValuesField("_version", Versions.MATCH_ANY); document.add(uidField); document.add(versionField); BytesReference source = new BytesArray(new byte[] { 1 }); ParsedDocument doc = new ParsedDocument(versionField, id, type, null, -1, -1, Arrays.asList(document), source, null); Engine.Index index = new Engine.Index(new Term("_uid", uid), doc); engine.index(index); return index; }
15. BaseTestCheckIndex#testBogusTermVectors()
Project: lucene-solr
File: BaseTestCheckIndex.java
File: BaseTestCheckIndex.java
// LUCENE-4221: we have to let these thru, for now public void testBogusTermVectors(Directory dir) throws IOException { IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); Field field = new Field("foo", "", ft); field.setTokenStream(new CannedTokenStream(new Token("bar", 5, 10), new Token("bar", 1, 4))); doc.add(field); iw.addDocument(doc); iw.close(); }
16. TestWildcardRandom#setUp()
Project: lucene-solr
File: TestWildcardRandom.java
File: TestWildcardRandom.java
@Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000))); Document doc = new Document(); Field field = newStringField("field", "", Field.Store.NO); doc.add(field); NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.setStringValue(df.format(i)); writer.addDocument(doc); } reader = writer.getReader(); searcher = newSearcher(reader); writer.close(); if (VERBOSE) { System.out.println("TEST: setUp searcher=" + searcher); } }
17. TestRegexpRandom#setUp()
Project: lucene-solr
File: TestRegexpRandom.java
File: TestRegexpRandom.java
@Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000))); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setOmitNorms(true); Field field = newField("field", "", customType); doc.add(field); NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.setStringValue(df.format(i)); writer.addDocument(doc); } reader = writer.getReader(); writer.close(); searcher = newSearcher(reader); }
18. TestPrefixRandom#setUp()
Project: lucene-solr
File: TestPrefixRandom.java
File: TestPrefixRandom.java
@Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000))); Document doc = new Document(); Field field = newStringField("field", "", Field.Store.NO); doc.add(field); int num = atLeast(1000); for (int i = 0; i < num; i++) { field.setStringValue(TestUtil.randomUnicodeString(random(), 10)); writer.addDocument(doc); } reader = writer.getReader(); searcher = newSearcher(reader); writer.close(); }
19. TestConstantScoreQuery#testPropagatesApproximations()
Project: lucene-solr
File: TestConstantScoreQuery.java
File: TestConstantScoreQuery.java
public void testPropagatesApproximations() throws IOException { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); Document doc = new Document(); Field f = newTextField("field", "a b", Field.Store.NO); doc.add(f); w.addDocument(doc); w.commit(); DirectoryReader reader = w.getReader(); final IndexSearcher searcher = newSearcher(reader); // to still have approximations searcher.setQueryCache(null); PhraseQuery pq = new PhraseQuery("field", "a", "b"); ConstantScoreQuery q = new ConstantScoreQuery(pq); final Weight weight = searcher.createNormalizedWeight(q, true); final Scorer scorer = weight.scorer(searcher.getIndexReader().leaves().get(0)); assertNotNull(scorer.twoPhaseIterator()); reader.close(); w.close(); dir.close(); }
20. TestSimilarity2#testOmitTFAndNorms()
Project: lucene-solr
File: TestSimilarity2.java
File: TestSimilarity2.java
/** make sure all sims work if TF and norms is omitted */ public void testOmitTFAndNorms() throws Exception { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS); ft.setOmitNorms(true); ft.freeze(); Field f = newField("foo", "bar", ft); doc.add(f); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher is = newSearcher(ir); for (Similarity sim : sims) { is.setSimilarity(sim); BooleanQuery.Builder query = new BooleanQuery.Builder(); query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); assertEquals(1, is.search(query.build(), 10).totalHits); } ir.close(); dir.close(); }
21. TestSimilarity2#testOmitTF()
Project: lucene-solr
File: TestSimilarity2.java
File: TestSimilarity2.java
/** make sure all sims work if TF is omitted */ public void testOmitTF() throws Exception { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS); ft.freeze(); Field f = newField("foo", "bar", ft); doc.add(f); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher is = newSearcher(ir); for (Similarity sim : sims) { is.setSimilarity(sim); BooleanQuery.Builder query = new BooleanQuery.Builder(); query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); assertEquals(1, is.search(query.build(), 10).totalHits); } ir.close(); dir.close(); }
22. TestUniqueTermCount#setUp()
Project: lucene-solr
File: TestUniqueTermCount.java
File: TestUniqueTermCount.java
@Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); IndexWriterConfig config = newIndexWriterConfig(analyzer); config.setMergePolicy(newLogMergePolicy()); config.setSimilarity(new TestSimilarity()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); Document doc = new Document(); Field foo = newTextField("foo", "", Field.Store.NO); doc.add(foo); for (int i = 0; i < 100; i++) { foo.setStringValue(addValue()); writer.addDocument(doc); } reader = writer.getReader(); writer.close(); }
23. TestReaderClosed#setUp()
Project: lucene-solr
File: TestReaderClosed.java
File: TestReaderClosed.java
@Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000))); Document doc = new Document(); Field field = newStringField("field", "", Field.Store.NO); doc.add(field); // we generate aweful prefixes: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. int num = atLeast(10); for (int i = 0; i < num; i++) { field.setStringValue(TestUtil.randomUnicodeString(random(), 10)); writer.addDocument(doc); } writer.forceMerge(1); reader = writer.getReader(); writer.close(); }
24. TestPostingsOffsets#testLegalbutVeryLargeOffsets()
Project: lucene-solr
File: TestPostingsOffsets.java
File: TestPostingsOffsets.java
public void testLegalbutVeryLargeOffsets() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); Token t1 = new Token("foo", 0, Integer.MAX_VALUE - 500); if (random().nextBoolean()) { t1.setPayload(new BytesRef("test")); } Token t2 = new Token("foo", Integer.MAX_VALUE - 500, Integer.MAX_VALUE); TokenStream tokenStream = new CannedTokenStream(new Token[] { t1, t2 }); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // store some term vectors for the checkindex cross-check ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); ft.setStoreTermVectorOffsets(true); Field field = new Field("foo", tokenStream, ft); doc.add(field); iw.addDocument(doc); iw.close(); dir.close(); }
25. TestOmitTf#testStats()
Project: lucene-solr
File: TestOmitTf.java
File: TestOmitTf.java
/** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are -1 */ public void testStats() throws Exception { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random()))); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS); ft.freeze(); Field f = newField("foo", "bar", ft); doc.add(f); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); assertEquals(-1, ir.totalTermFreq(new Term("foo", new BytesRef("bar")))); assertEquals(-1, ir.getSumTotalTermFreq("foo")); ir.close(); dir.close(); }
26. TestOmitPositions#testBasic()
Project: lucene-solr
File: TestOmitPositions.java
File: TestOmitPositions.java
public void testBasic() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); Field f = newField("foo", "this is a test test", ft); doc.add(f); for (int i = 0; i < 100; i++) { w.addDocument(doc); } IndexReader reader = w.getReader(); w.close(); assertNotNull(MultiFields.getTermPositionsEnum(reader, "foo", new BytesRef("test"))); PostingsEnum de = TestUtil.docs(random(), reader, "foo", new BytesRef("test"), null, PostingsEnum.FREQS); while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { assertEquals(2, de.freq()); } reader.close(); dir.close(); }
27. TestMaxTermFrequency#setUp()
Project: lucene-solr
File: TestMaxTermFrequency.java
File: TestMaxTermFrequency.java
@Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy()); config.setSimilarity(new TestSimilarity()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); Document doc = new Document(); Field foo = newTextField("foo", "", Field.Store.NO); doc.add(foo); for (int i = 0; i < 100; i++) { foo.setStringValue(addValue()); writer.addDocument(doc); } reader = writer.getReader(); writer.close(); }
28. TestIndexWriterExceptions#testNullStoredBytesRefFieldReuse()
Project: lucene-solr
File: TestIndexWriterExceptions.java
File: TestIndexWriterExceptions.java
/** test a null bytesref value doesn't abort the entire segment */ public void testNullStoredBytesRefFieldReuse() throws Exception { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(analyzer)); // add good document Document doc = new Document(); Field theField = new StoredField("foo", new BytesRef("hello")); doc.add(theField); iw.addDocument(doc); expectThrows(IllegalArgumentException.class, () -> { // set to null value BytesRef v = null; theField.setBytesValue(v); iw.addDocument(doc); fail("didn't get expected exception"); }); assertNull(iw.getTragicException()); iw.close(); // make sure we see our good doc DirectoryReader r = DirectoryReader.open(dir); assertEquals(1, r.numDocs()); r.close(); dir.close(); }
29. TestIndexWriterExceptions#testNullStoredBytesFieldReuse()
Project: lucene-solr
File: TestIndexWriterExceptions.java
File: TestIndexWriterExceptions.java
/** test a null byte[] value doesn't abort the entire segment */ public void testNullStoredBytesFieldReuse() throws Exception { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(analyzer)); // add good document Document doc = new Document(); Field theField = new StoredField("foo", new BytesRef("hello").bytes); doc.add(theField); iw.addDocument(doc); expectThrows(NullPointerException.class, () -> { // set to null value byte v[] = null; theField.setBytesValue(v); iw.addDocument(doc); }); assertNull(iw.getTragicException()); iw.close(); // make sure we see our good doc DirectoryReader r = DirectoryReader.open(dir); assertEquals(1, r.numDocs()); r.close(); dir.close(); }
30. TestIndexWriterExceptions#testNullStoredFieldReuse()
Project: lucene-solr
File: TestIndexWriterExceptions.java
File: TestIndexWriterExceptions.java
/** test a null string value doesn't abort the entire segment */ public void testNullStoredFieldReuse() throws Exception { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(analyzer)); // add good document Document doc = new Document(); Field theField = new StoredField("foo", "hello", StoredField.TYPE); doc.add(theField); iw.addDocument(doc); expectThrows(IllegalArgumentException.class, () -> { // set to null value theField.setStringValue(null); iw.addDocument(doc); }); assertNull(iw.getTragicException()); iw.close(); // make sure we see our good doc DirectoryReader r = DirectoryReader.open(dir); assertEquals(1, r.numDocs()); r.close(); dir.close(); }
31. TestDocValuesIndexing#testMultiValuedDocValuesField()
Project: lucene-solr
File: TestDocValuesIndexing.java
File: TestDocValuesIndexing.java
public void testMultiValuedDocValuesField() throws Exception { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); Field f = new NumericDocValuesField("field", 17); doc.add(f); // add the doc w.addDocument(doc); // Index doc values are single-valued so we should not // be able to add same field more than once: doc.add(f); expectThrows(IllegalArgumentException.class, () -> { w.addDocument(doc); fail("didn't hit expected exception"); }); DirectoryReader r = w.getReader(); w.close(); assertEquals(17, DocValues.getNumeric(getOnlyLeafReader(r), "field").get(0)); r.close(); d.close(); }
32. LuceneSearch#newDocument()
Project: zeppelin
File: LuceneSearch.java
File: LuceneSearch.java
/** * If paragraph is not null, indexes code in the paragraph, otherwise indexes * the notebook name. * * @param id id of the document, different for Note name and paragraph * @param noteName name of the note * @param p paragraph * @return */ private Document newDocument(String id, String noteName, Paragraph p) { Document doc = new Document(); Field pathField = new StringField(ID_FIELD, id, Field.Store.YES); doc.add(pathField); doc.add(new StringField("title", noteName, Field.Store.YES)); if (null != p) { doc.add(new TextField(SEARCH_FIELD_TEXT, p.getText(), Field.Store.YES)); if (p.getTitle() != null) { doc.add(new TextField(SEARCH_FIELD_TITLE, p.getTitle(), Field.Store.YES)); } Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated(); doc.add(new LongField("modified", date.getTime(), Field.Store.NO)); } else { doc.add(new TextField(SEARCH_FIELD_TEXT, noteName, Field.Store.YES)); } return doc; }
33. FieldBuilderTest#testCreateFieldTermVectorOffsetPositions()
Project: uima-addons
File: FieldBuilderTest.java
File: FieldBuilderTest.java
@Test public void testCreateFieldTermVectorOffsetPositions() throws Exception { fieldDescription.setMerge(true); fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES); fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_POSITIONS_OFFSETS); tokenStreams.remove(1); TokenStream tokenStream = createMock(TokenStream.class); expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream); replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertTrue(field1.isStoreOffsetWithTermVector()); assertTrue(field1.isTermVectorStored()); assertTrue(field1.isStorePositionWithTermVector()); }
34. FieldBuilderTest#testCreateFieldTermVectorPositions()
Project: uima-addons
File: FieldBuilderTest.java
File: FieldBuilderTest.java
@Test public void testCreateFieldTermVectorPositions() throws Exception { fieldDescription.setMerge(true); fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES); fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_POSITIONS); tokenStreams.remove(1); TokenStream tokenStream = createMock(TokenStream.class); expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream); replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertFalse(field1.isStoreOffsetWithTermVector()); assertTrue(field1.isTermVectorStored()); assertTrue(field1.isStorePositionWithTermVector()); }
35. FieldBuilderTest#testCreateFieldTermVectorOffset()
Project: uima-addons
File: FieldBuilderTest.java
File: FieldBuilderTest.java
@Test public void testCreateFieldTermVectorOffset() throws Exception { fieldDescription.setMerge(true); fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES); fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_WITH_OFFSETS); tokenStreams.remove(1); TokenStream tokenStream = createMock(TokenStream.class); expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream); replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertTrue(field1.isStoreOffsetWithTermVector()); assertTrue(field1.isTermVectorStored()); assertFalse(field1.isStorePositionWithTermVector()); }
36. FieldBuilderTest#testCreateFieldTermVector()
Project: uima-addons
File: FieldBuilderTest.java
File: FieldBuilderTest.java
@Test public void testCreateFieldTermVector() throws Exception { fieldDescription.setMerge(true); fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES); fieldDescription.setTermVector(FieldBuilder.FIELD_TERM_VECTOR_YES); tokenStreams.remove(1); TokenStream tokenStream = createMock(TokenStream.class); expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream); replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertFalse(field1.isStoreOffsetWithTermVector()); assertTrue(field1.isTermVectorStored()); assertFalse(field1.isStorePositionWithTermVector()); }
37. FieldBuilderTest#testCreateFieldNoNormsTF()
Project: uima-addons
File: FieldBuilderTest.java
File: FieldBuilderTest.java
@Test public void testCreateFieldNoNormsTF() throws Exception { fieldDescription.setMerge(true); fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_NORMS_TF); TokenStream tokenStream = createMock(TokenStream.class); expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream); replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertTrue(field1.getOmitTf()); assertTrue(field1.getOmitNorms()); assertTrue(field1.isIndexed()); assertFalse(field1.isStored()); }
38. FieldBuilderTest#testCreateFieldNoTF()
Project: uima-addons
File: FieldBuilderTest.java
File: FieldBuilderTest.java
@Test public void testCreateFieldNoTF() throws Exception { fieldDescription.setMerge(true); fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_TF); TokenStream tokenStream = createMock(TokenStream.class); expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream); replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertTrue(field1.getOmitTf()); assertFalse(field1.getOmitNorms()); assertTrue(field1.isIndexed()); assertFalse(field1.isStored()); }
39. FieldBuilderTest#testCreateFieldNoNorms()
Project: uima-addons
File: FieldBuilderTest.java
File: FieldBuilderTest.java
@Test public void testCreateFieldNoNorms() throws Exception { fieldDescription.setMerge(true); fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_NO_NORMS); TokenStream tokenStream = createMock(TokenStream.class); expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream); replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertTrue(field1.getOmitNorms()); assertTrue(field1.isIndexed()); assertFalse(field1.isStored()); }
40. FieldBuilderTest#testCreateFieldMerged()
Project: uima-addons
File: FieldBuilderTest.java
File: FieldBuilderTest.java
@Test public void testCreateFieldMerged() throws Exception { fieldDescription.setMerge(true); fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES); TokenStream tokenStream = createMock(TokenStream.class); expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream); replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertEquals(tokenStream, field1.tokenStreamValue()); }
41. FieldBuilderTest#testCreateFieldConcatenated()
Project: uima-addons
File: FieldBuilderTest.java
File: FieldBuilderTest.java
@Test public void testCreateFieldConcatenated() throws Exception { fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES); TokenStream tokenStream = createMock(TokenStream.class); expect(filterBuilder.filter(isA(TokenStreamConcatenator.class), isA(Collection.class))).andReturn(tokenStream); replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertEquals(tokenStream, field1.tokenStreamValue()); }
42. MatchingField#getFieldValue()
Project: spacewalk
File: MatchingField.java
File: MatchingField.java
/** * * @return value most responsible for this document being a match */ public String getFieldValue() { String fieldName = getFieldName(); Field f = doc.getField(fieldName); if (f == null) { StringBuffer sb = new StringBuffer(); sb.append("[length=" + terms.length + "; "); for (Object o : terms) { sb.append(o + ", "); } sb.append("]"); log.info("Unable to get matchingFieldValue for field : " + fieldName + " with query: " + query + ", and terms = " + sb.toString()); log.info("Document = " + doc); return ""; } String value = f.stringValue(); if (needNumberToolsAdjust.containsKey(fieldName)) { Long temp = NumberTools.stringToLong(value); value = temp.toString(); } return value; }
43. SecureRealTimeGetComponent#getFilteredInternalDocId()
Project: sentry
File: SecureRealTimeGetComponent.java
File: SecureRealTimeGetComponent.java
/** * @param doc SolrDocument to check * @param idField field where the id is stored * @param fieldType type of id field * @param filterQuery Query to filter by * @param searcher SolrIndexSearcher on which to apply the filter query * @returns the internal docid, or -1 if doc is not found or doesn't match filter */ private static int getFilteredInternalDocId(SolrDocument doc, SchemaField idField, FieldType fieldType, Query filterQuery, SolrIndexSearcher searcher) throws IOException { int docid = -1; Field f = (Field) doc.getFieldValue(idField.getName()); String idStr = f.stringValue(); BytesRef idBytes = new BytesRef(); fieldType.readableToIndexed(idStr, idBytes); // get the internal document id long segAndId = searcher.lookupId(idBytes); // if docid is valid, run it through the filter if (segAndId >= 0) { int segid = (int) segAndId; AtomicReaderContext ctx = searcher.getTopReaderContext().leaves().get((int) (segAndId >> 32)); docid = segid + ctx.docBase; Weight weight = filterQuery.createWeight(searcher); Scorer scorer = weight.scorer(ctx, null); if (scorer == null || segid != scorer.advance(segid)) { // filter doesn't match. docid = -1; } } return docid; }
44. SurfDocumentBuilder#createDescriptorFields()
Project: LIRE
File: SurfDocumentBuilder.java
File: SurfDocumentBuilder.java
@Override public Field[] createDescriptorFields(BufferedImage image) { Field[] result = null; Surf s = new Surf(image); List<SURFInterestPoint> interestPoints = s.getFreeOrientedInterestPoints(); result = new Field[interestPoints.size()]; int count = 0; for (Iterator<SURFInterestPoint> sipi = interestPoints.iterator(); sipi.hasNext(); ) { SURFInterestPoint sip = sipi.next(); SurfFeature sf = new SurfFeature(sip); result[count] = (new StoredField(DocumentBuilder.FIELD_NAME_SURF, sf.getByteArrayRepresentation())); count++; } return result; }
45. SiftDocumentBuilder#createDescriptorFields()
Project: LIRE
File: SiftDocumentBuilder.java
File: SiftDocumentBuilder.java
@Override public Field[] createDescriptorFields(BufferedImage image) { Field[] result = null; try { // extract features from image: List<Feature> features = extractor.computeSiftFeatures(image); result = new Field[features.size()]; int count = 0; // create new document: for (Iterator<Feature> fit = features.iterator(); fit.hasNext(); ) { Feature f = fit.next(); result[count] = new StoredField(DocumentBuilder.FIELD_NAME_SIFT, f.getByteArrayRepresentation()); count++; } } catch (IOException e) { logger.severe(e.getMessage()); } return result; }
46. CvSurfDocumentBuilder#createDescriptorFields()
Project: LIRE
File: CvSurfDocumentBuilder.java
File: CvSurfDocumentBuilder.java
@Override public Field[] createDescriptorFields(BufferedImage image) { CvSurfExtractor s = new CvSurfExtractor(); LinkedList<CvSurfFeature> descriptors = s.computeSurfFeatures(image); Field[] result = new Field[descriptors.size()]; int count = 0; for (Iterator<CvSurfFeature> cvsf = descriptors.iterator(); cvsf.hasNext(); ) { CvSurfFeature sf = cvsf.next(); result[count] = (new StoredField(DocumentBuilder.FIELD_NAME_CVSURF, sf.getByteArrayRepresentation())); count++; } return result; }
47. CvSiftDocumentBuilder#createDescriptorFields()
Project: LIRE
File: CvSiftDocumentBuilder.java
File: CvSiftDocumentBuilder.java
@Override public Field[] createDescriptorFields(BufferedImage image) { // extract descriptors from image: LinkedList<CvSiftFeature> descriptors = extractor.computeSiftFeatures(image); Field[] result = new Field[descriptors.size()]; int count = 0; // create new document: for (Iterator<CvSiftFeature> cvsf = descriptors.iterator(); cvsf.hasNext(); ) { CvSiftFeature f = cvsf.next(); result[count] = new StoredField(DocumentBuilder.FIELD_NAME_CVSIFT, f.getByteArrayRepresentation()); count++; } return result; }
48. AbstractDocumentBuilder#createDocument()
Project: LIRE
File: AbstractDocumentBuilder.java
File: AbstractDocumentBuilder.java
@Override public Document createDocument(BufferedImage image, String identifier) throws FileNotFoundException { assert (image != null); Document doc = new Document(); if (identifier != null) { doc.add(new StringField(DocumentBuilder.FIELD_NAME_IDENTIFIER, identifier, Field.Store.YES)); } Field[] fields = createDescriptorFields(image); for (int i = 0; i < fields.length; i++) { doc.add(fields[i]); } return doc; }
49. SpatialIndexLucene#doc()
Project: jena
File: SpatialIndexLucene.java
File: SpatialIndexLucene.java
private Document doc(String entityURI, Shape... shapes) { Document doc = new Document(); Field entField = new Field(docDef.getEntityField(), entityURI, ftIRI); doc.add(entField); for (Shape shape : shapes) { for (IndexableField f : strategy.createIndexableFields(shape)) { doc.add(f); } } return doc; }
50. IndexAugmentorFactoryTest#validateComposedFields()
Project: jackrabbit-oak
File: IndexAugmentorFactoryTest.java
File: IndexAugmentorFactoryTest.java
void validateComposedFields(String type, String... expected) { IndexFieldProvider compositeIndexProvider = indexAugmentorFactory.getIndexFieldProvider(type); if (expected.length > 0) { assertTrue("Composed index field provider doesn't declare correct supported type", compositeIndexProvider.getSupportedTypes().contains(type)); } Iterable<Field> fields = compositeIndexProvider.getAugmentedFields(null, null, null); Set<String> ids = Sets.newHashSet(); for (Field f : fields) { ids.add(f.stringValue()); } assertEquals(expected.length, Iterables.size(ids)); assertThat(ids, CoreMatchers.hasItems(expected)); }
51. LsiIndexer#add()
Project: indextank-engine
File: LsiIndexer.java
File: LsiIndexer.java
/** *@inheritDoc */ public synchronized void add(final String docId, final Document itdoc) { if (null == docId) { logger.error("No documentId specified. Ignoring addition."); return; } org.apache.lucene.document.Document doc = asLuceneDocument(itdoc); org.apache.lucene.document.Field docidPayloadField = new org.apache.lucene.document.Field(LsiIndex.PAYLOAD_TERM_FIELD, docId, Field.Store.NO, Field.Index.ANALYZED); doc.add(docidPayloadField); doc.add(new Field("documentId", docId, Field.Store.NO, Field.Index.NOT_ANALYZED)); try { if (logger.isDebugEnabled()) { logger.debug("Adding document with docId=" + docId + ". Doc is " + itdoc.getFieldNames()); } writer.updateDocument(docIdTerm(docId), doc); } catch (IOException e) { logger.error(e); } }
52. SecureRealTimeGetComponent#getFilteredInternalDocId()
Project: incubator-sentry
File: SecureRealTimeGetComponent.java
File: SecureRealTimeGetComponent.java
/** * @param doc SolrDocument to check * @param idField field where the id is stored * @param fieldType type of id field * @param filterQuery Query to filter by * @param searcher SolrIndexSearcher on which to apply the filter query * @returns the internal docid, or -1 if doc is not found or doesn't match filter */ private static int getFilteredInternalDocId(SolrDocument doc, SchemaField idField, FieldType fieldType, Query filterQuery, SolrIndexSearcher searcher) throws IOException { int docid = -1; Field f = (Field) doc.getFieldValue(idField.getName()); String idStr = f.stringValue(); BytesRef idBytes = new BytesRef(); fieldType.readableToIndexed(idStr, idBytes); // get the internal document id long segAndId = searcher.lookupId(idBytes); // if docid is valid, run it through the filter if (segAndId >= 0) { int segid = (int) segAndId; AtomicReaderContext ctx = searcher.getTopReaderContext().leaves().get((int) (segAndId >> 32)); docid = segid + ctx.docBase; Weight weight = filterQuery.createWeight(searcher); Scorer scorer = weight.scorer(ctx, null); if (scorer == null || segid != scorer.advance(segid)) { // filter doesn't match. docid = -1; } } return docid; }
53. SourceSimpleFragmentsBuilder#getFields()
Project: elasticsearch
File: SourceSimpleFragmentsBuilder.java
File: SourceSimpleFragmentsBuilder.java
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { // we know its low level reader, and matching docId, since that's how we call the highlighter with SourceLookup sourceLookup = searchContext.lookup().source(); sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId); List<Object> values = sourceLookup.extractRawValues(mapper.fieldType().name()); if (values.isEmpty()) { return EMPTY_FIELDS; } Field[] fields = new Field[values.size()]; for (int i = 0; i < values.size(); i++) { fields[i] = new Field(mapper.fieldType().name(), values.get(i).toString(), TextField.TYPE_NOT_STORED); } return fields; }
54. SourceScoreOrderFragmentsBuilder#getFields()
Project: elasticsearch
File: SourceScoreOrderFragmentsBuilder.java
File: SourceScoreOrderFragmentsBuilder.java
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { // we know its low level reader, and matching docId, since that's how we call the highlighter with SourceLookup sourceLookup = searchContext.lookup().source(); sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId); List<Object> values = sourceLookup.extractRawValues(mapper.fieldType().name()); Field[] fields = new Field[values.size()]; for (int i = 0; i < values.size(); i++) { fields[i] = new Field(mapper.fieldType().name(), values.get(i).toString(), TextField.TYPE_NOT_STORED); } return fields; }
55. PointVectorStrategy#createIndexableFields()
Project: lucene-solr
File: PointVectorStrategy.java
File: PointVectorStrategy.java
/** @see #createIndexableFields(org.locationtech.spatial4j.shape.Shape) */ public Field[] createIndexableFields(Point point) { Field[] fields = new Field[fieldsLen]; int idx = -1; if (hasStored) { fields[++idx] = new StoredField(fieldNameX, point.getX()); fields[++idx] = new StoredField(fieldNameY, point.getY()); } if (hasDocVals) { fields[++idx] = new DoubleDocValuesField(fieldNameX, point.getX()); fields[++idx] = new DoubleDocValuesField(fieldNameY, point.getY()); } if (hasPointVals) { fields[++idx] = new DoublePoint(fieldNameX, point.getX()); fields[++idx] = new DoublePoint(fieldNameY, point.getY()); } if (legacyNumericFieldType != null) { fields[++idx] = new LegacyDoubleField(fieldNameX, point.getX(), legacyNumericFieldType); fields[++idx] = new LegacyDoubleField(fieldNameY, point.getY(), legacyNumericFieldType); } assert idx == fields.length - 1; return fields; }
56. TestIDVersionPostingsFormat#testCannotIndexTermVectors()
Project: lucene-solr
File: TestIDVersionPostingsFormat.java
File: TestIDVersionPostingsFormat.java
// LUCENE-5693: because CheckIndex cross-checks term vectors with postings even for deleted docs, and because our PF only indexes the // non-deleted documents on flush, CheckIndex will see this as corruption: public void testCannotIndexTermVectors() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat())); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); FieldType ft = new FieldType(StringAndPayloadField.TYPE); ft.setStoreTermVectors(true); SingleTokenWithPayloadTokenStream ts = new SingleTokenWithPayloadTokenStream(); BytesRef payload = new BytesRef(8); payload.length = 8; IDVersionPostingsFormat.longToBytes(17, payload); ts.setValue("foo", payload); Field field = new Field("id", ts, ft); doc.add(new Field("id", ts, ft)); expectThrows(IllegalArgumentException.class, () -> { w.addDocument(doc); w.commit(); fail("didn't hit expected exception"); }); w.close(); dir.close(); }
57. TestQueryParser#isAHit()
Project: lucene-solr
File: TestQueryParser.java
File: TestQueryParser.java
private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException { Directory ramDir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), ramDir, analyzer); Document doc = new Document(); FieldType fieldType = new FieldType(); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); fieldType.setTokenized(true); fieldType.setStored(true); Field field = new Field(FIELD, content, fieldType); doc.add(field); writer.addDocument(doc); writer.close(); DirectoryReader ir = DirectoryReader.open(ramDir); IndexSearcher is = new IndexSearcher(ir); int hits = is.search(q, 10).totalHits; ir.close(); ramDir.close(); if (hits == 1) { return true; } else { return false; } }
58. TestBoostedQuery#beforeClass()
Project: lucene-solr
File: TestBoostedQuery.java
File: TestBoostedQuery.java
@BeforeClass public static void beforeClass() throws Exception { dir = newDirectory(); IndexWriterConfig iwConfig = newIndexWriterConfig(new MockAnalyzer(random())); iwConfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig); Document document = new Document(); Field idField = new SortedDocValuesField("id", new BytesRef()); document.add(idField); iw.addDocument(document); ir = iw.getReader(); is = newSearcher(ir); iw.close(); }
59. AllGroupHeadsCollectorTest#addGroupField()
Project: lucene-solr
File: AllGroupHeadsCollectorTest.java
File: AllGroupHeadsCollectorTest.java
private void addGroupField(Document doc, String groupField, String value, DocValuesType valueType) { Field valuesField = null; switch(valueType) { case BINARY: valuesField = new BinaryDocValuesField(groupField, new BytesRef(value)); break; case SORTED: valuesField = new SortedDocValuesField(groupField, new BytesRef(value)); break; default: fail("unhandled type"); } doc.add(valuesField); }
60. SourceSimpleFragmentsBuilder#getFields()
Project: elassandra
File: SourceSimpleFragmentsBuilder.java
File: SourceSimpleFragmentsBuilder.java
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { // we know its low level reader, and matching docId, since that's how we call the highlighter with SourceLookup sourceLookup = searchContext.lookup().source(); sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId); List<Object> values = sourceLookup.extractRawValues(hitContext.getSourcePath(mapper.fieldType().names().fullName())); if (values.isEmpty()) { return EMPTY_FIELDS; } Field[] fields = new Field[values.size()]; for (int i = 0; i < values.size(); i++) { fields[i] = new Field(mapper.fieldType().names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED); } return fields; }
61. SourceScoreOrderFragmentsBuilder#getFields()
Project: elassandra
File: SourceScoreOrderFragmentsBuilder.java
File: SourceScoreOrderFragmentsBuilder.java
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { // we know its low level reader, and matching docId, since that's how we call the highlighter with SourceLookup sourceLookup = searchContext.lookup().source(); sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId); List<Object> values = sourceLookup.extractRawValues(hitContext.getSourcePath(mapper.fieldType().names().fullName())); Field[] fields = new Field[values.size()]; for (int i = 0; i < values.size(); i++) { fields[i] = new Field(mapper.fieldType().names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED); } return fields; }
62. ParentFieldMapper#createField()
Project: elassandra
File: ParentFieldMapper.java
File: ParentFieldMapper.java
@Override public void createField(ParseContext context, Object value) throws IOException { String parentId = (String) value; boolean parent = context.docMapper().isParent(context.type()); if (parent) { addJoinFieldIfNeeded(context, parentJoinFieldType, context.id()); } if (!active()) { return; } Field field = new Field(fieldType().names().indexName(), Uid.createUid(context.stringBuilder(), parentType, parentId), fieldType()); setBoost(field); context.doc().add(field); addJoinFieldIfNeeded(context, childJoinFieldType, parentId); // we have parent mapping, yet no value was set, ignore it... }
63. LuceneResultSetRow#getChildAssocRef()
Project: community-edition
File: LuceneResultSetRow.java
File: LuceneResultSetRow.java
@Override public ChildAssociationRef getChildAssocRef() { Field field = getDocument().getField("PRIMARYPARENT"); String primaryParent = null; if (field != null) { primaryParent = field.stringValue(); } NodeRef childNodeRef = getNodeRef(); NodeRef parentNodeRef = primaryParent == null ? null : tenantService.getBaseName(new NodeRef(primaryParent)); return new ChildAssociationRef(getPrimaryAssocTypeQName(), parentNodeRef, getQName(), childNodeRef); }
64. LuceneResultSetRow#getPrimaryAssocTypeQName()
Project: community-edition
File: LuceneResultSetRow.java
File: LuceneResultSetRow.java
public QName getPrimaryAssocTypeQName() { Field field = getDocument().getField("PRIMARYASSOCTYPEQNAME"); if (field != null) { String qname = field.stringValue(); return QName.createQName(qname); } else { return ContentModel.ASSOC_CHILDREN; } }
65. LuceneResultSetRow#getQName()
Project: community-edition
File: LuceneResultSetRow.java
File: LuceneResultSetRow.java
public QName getQName() { Field field = getDocument().getField("QNAME"); if (field != null) { String qname = field.stringValue(); if ((qname == null) || (qname.length() == 0)) { return null; } else { return QName.createQName(qname); } } else { return null; } }
66. TestAutomatonQueryUnicode#setUp()
Project: lucene-solr
File: TestAutomatonQueryUnicode.java
File: TestAutomatonQueryUnicode.java
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); Document doc = new Document(); Field titleField = newTextField("title", "some title", Field.Store.NO); Field field = newTextField(FN, "", Field.Store.NO); Field footerField = newTextField("footer", "a footer", Field.Store.NO); doc.add(titleField); doc.add(field); doc.add(footerField); field.setStringValue("??abcdef"); writer.addDocument(doc); field.setStringValue("??ghijkl"); writer.addDocument(doc); // this sorts before the previous two in UTF-8/UTF-32, but after in UTF-16!!! field.setStringValue("?mnopqr"); writer.addDocument(doc); // this one too. field.setStringValue("?stuvwx"); writer.addDocument(doc); field.setStringValue("a?bc"); writer.addDocument(doc); field.setStringValue("abc"); writer.addDocument(doc); field.setStringValue("a?bc"); writer.addDocument(doc); field.setStringValue("a?bc"); writer.addDocument(doc); field.setStringValue("bacadaba"); writer.addDocument(doc); field.setStringValue(""); writer.addDocument(doc); field.setStringValue("??"); writer.addDocument(doc); field.setStringValue(""); writer.addDocument(doc); reader = writer.getReader(); searcher = newSearcher(reader); writer.close(); }
67. BaseTestRangeFilter#build()
Project: lucene-solr
File: BaseTestRangeFilter.java
File: BaseTestRangeFilter.java
private static IndexReader build(Random random, TestIndex index) throws IOException { /* build an index */ Document doc = new Document(); Field idField = newStringField(random, "id", "", Field.Store.YES); Field idDVField = new SortedDocValuesField("id", new BytesRef()); Field intIdField = new IntPoint("id_int", 0); Field intDVField = new NumericDocValuesField("id_int", 0); Field floatIdField = new FloatPoint("id_float", 0); Field floatDVField = new NumericDocValuesField("id_float", 0); Field longIdField = new LongPoint("id_long", 0); Field longDVField = new NumericDocValuesField("id_long", 0); Field doubleIdField = new DoublePoint("id_double", 0); Field doubleDVField = new NumericDocValuesField("id_double", 0); Field randField = newStringField(random, "rand", "", Field.Store.YES); Field randDVField = new SortedDocValuesField("rand", new BytesRef()); Field bodyField = newStringField(random, "body", "", Field.Store.NO); Field bodyDVField = new SortedDocValuesField("body", new BytesRef()); doc.add(idField); doc.add(idDVField); doc.add(intIdField); doc.add(intDVField); doc.add(floatIdField); doc.add(floatDVField); doc.add(longIdField); doc.add(longDVField); doc.add(doubleIdField); doc.add(doubleDVField); doc.add(randField); doc.add(randDVField); doc.add(bodyField); doc.add(bodyDVField); RandomIndexWriter writer = new RandomIndexWriter(random, index.index, newIndexWriterConfig(random, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(TestUtil.nextInt(random, 50, 1000)).setMergePolicy(newLogMergePolicy())); TestUtil.reduceOpenFiles(writer.w); while (true) { int minCount = 0; int maxCount = 0; for (int d = minId; d <= maxId; d++) { idField.setStringValue(pad(d)); idDVField.setBytesValue(new BytesRef(pad(d))); intIdField.setIntValue(d); intDVField.setLongValue(d); floatIdField.setFloatValue(d); floatDVField.setLongValue(Float.floatToRawIntBits(d)); longIdField.setLongValue(d); longDVField.setLongValue(d); doubleIdField.setDoubleValue(d); doubleDVField.setLongValue(Double.doubleToRawLongBits(d)); int r = index.allowNegativeRandomInts ? random.nextInt() : random.nextInt(Integer.MAX_VALUE); if (index.maxR < r) { index.maxR = r; maxCount = 1; } else if (index.maxR == r) { maxCount++; } if (r < index.minR) { index.minR = r; minCount = 1; } else if (r == index.minR) { minCount++; } randField.setStringValue(pad(r)); randDVField.setBytesValue(new BytesRef(pad(r))); bodyField.setStringValue("body"); bodyDVField.setBytesValue(new BytesRef("body")); writer.addDocument(doc); } if (minCount == 1 && maxCount == 1) { // our subclasses rely on only 1 doc having the min or // max, so, we loop until we satisfy that. it should be // exceedingly rare (Yonik calculates 1 in ~429,000) // times) that this loop requires more than one try: IndexReader ir = writer.getReader(); writer.close(); return ir; } // try again writer.deleteAll(); } }
68. DocMaker#createDocument()
Project: lucene-solr
File: DocMaker.java
File: DocMaker.java
// create a doc // use only part of the body, modify it to keep the rest (or use all if size==0). // reset the docdata properties so they are not added more than once. private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException { final DocState ds = getDocState(); final Document doc = reuseFields ? ds.doc : new Document(); doc.clear(); // Set ID_FIELD FieldType ft = new FieldType(valType); ft.setStored(true); Field idField = ds.getField(ID_FIELD, ft); int id; if (r != null) { id = r.nextInt(updateDocIDLimit); } else { id = docData.getID(); if (id == -1) { id = numDocsCreated.getAndIncrement(); } } idField.setStringValue(Integer.toString(id)); doc.add(idField); // Set NAME_FIELD String name = docData.getName(); if (name == null) name = ""; name = cnt < 0 ? name : name + "_" + cnt; Field nameField = ds.getField(NAME_FIELD, valType); nameField.setStringValue(name); doc.add(nameField); // Set DATE_FIELD DateUtil util = dateParsers.get(); if (util == null) { util = new DateUtil(); dateParsers.set(util); } Date date = null; String dateString = docData.getDate(); if (dateString != null) { util.pos.setIndex(0); date = util.parser.parse(dateString, util.pos); //System.out.println(dateString + " parsed to " + date); } else { dateString = ""; } Field dateStringField = ds.getField(DATE_FIELD, valType); dateStringField.setStringValue(dateString); doc.add(dateStringField); if (date == null) { // just set to right now date = new Date(); } Field dateField = ds.getNumericField(DATE_MSEC_FIELD, Long.class); dateField.setLongValue(date.getTime()); doc.add(dateField); util.cal.setTime(date); final int sec = util.cal.get(Calendar.HOUR_OF_DAY) * 3600 + util.cal.get(Calendar.MINUTE) * 60 + util.cal.get(Calendar.SECOND); Field timeSecField = ds.getNumericField(TIME_SEC_FIELD, Integer.class); timeSecField.setIntValue(sec); doc.add(timeSecField); // Set TITLE_FIELD String title = docData.getTitle(); Field titleField = ds.getField(TITLE_FIELD, valType); titleField.setStringValue(title == null ? "" : title); doc.add(titleField); String body = docData.getBody(); if (body != null && body.length() > 0) { String bdy; if (size <= 0 || size >= body.length()) { // use all bdy = body; // nothing left docData.setBody(""); } else { // attempt not to break words - if whitespace found within next 20 chars... for (int n = size - 1; n < size + 20 && n < body.length(); n++) { if (Character.isWhitespace(body.charAt(n))) { size = n; break; } } // use part bdy = body.substring(0, size); // some left docData.setBody(body.substring(size)); } Field bodyField = ds.getField(BODY_FIELD, bodyValType); bodyField.setStringValue(bdy); doc.add(bodyField); if (storeBytes) { Field bytesField = ds.getField(BYTES_FIELD, StringField.TYPE_STORED); bytesField.setBytesValue(bdy.getBytes(StandardCharsets.UTF_8)); doc.add(bytesField); } } if (indexProperties) { Properties props = docData.getProps(); if (props != null) { for (final Map.Entry<Object, Object> entry : props.entrySet()) { Field f = ds.getField((String) entry.getKey(), valType); f.setStringValue((String) entry.getValue()); doc.add(f); } docData.setProps(null); } } //System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n=========="); return doc; }
69. TestValueSources#beforeClass()
Project: lucene-solr
File: TestValueSources.java
File: TestValueSources.java
@BeforeClass public static void beforeClass() throws Exception { dir = newDirectory(); analyzer = new MockAnalyzer(random()); IndexWriterConfig iwConfig = newIndexWriterConfig(analyzer); iwConfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig); Document document = new Document(); Field idField = new StringField("id", "", Field.Store.NO); document.add(idField); Field idDVField = new SortedDocValuesField("id", new BytesRef()); document.add(idDVField); Field doubleDVField = new NumericDocValuesField("double", 0); document.add(doubleDVField); Field floatDVField = new NumericDocValuesField("float", 0); document.add(floatDVField); Field intDVField = new NumericDocValuesField("int", 0); document.add(intDVField); Field longDVField = new NumericDocValuesField("long", 0); document.add(longDVField); Field stringField = new StringField("string", "", Field.Store.NO); document.add(stringField); Field stringDVField = new SortedDocValuesField("string", new BytesRef()); document.add(stringDVField); Field textField = new TextField("text", "", Field.Store.NO); document.add(textField); for (String[] doc : documents) { idField.setStringValue(doc[0]); idDVField.setBytesValue(new BytesRef(doc[0])); doubleDVField.setLongValue(Double.doubleToRawLongBits(Double.valueOf(doc[1]))); floatDVField.setLongValue(Float.floatToRawIntBits(Float.valueOf(doc[2]))); intDVField.setLongValue(Integer.valueOf(doc[3])); longDVField.setLongValue(Long.valueOf(doc[4])); stringField.setStringValue(doc[5]); stringDVField.setBytesValue(new BytesRef(doc[5])); textField.setStringValue(doc[6]); iw.addDocument(document); } reader = iw.getReader(); searcher = newSearcher(reader); iw.close(); }
70. TestDocValuesScoring#testSimple()
Project: lucene-solr
File: TestDocValuesScoring.java
File: TestDocValuesScoring.java
/* for comparing floats */ public void testSimple() throws Exception { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); Document doc = new Document(); Field field = newTextField("foo", "", Field.Store.NO); doc.add(field); Field dvField = new FloatDocValuesField("foo_boost", 0.0F); doc.add(dvField); Field field2 = newTextField("bar", "", Field.Store.NO); doc.add(field2); field.setStringValue("quick brown fox"); field2.setStringValue("quick brown fox"); // boost x2 dvField.setFloatValue(2f); iw.addDocument(doc); field.setStringValue("jumps over lazy brown dog"); field2.setStringValue("jumps over lazy brown dog"); // boost x4 dvField.setFloatValue(4f); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); // no boosting IndexSearcher searcher1 = newSearcher(ir, false); final Similarity base = searcher1.getSimilarity(true); // boosting IndexSearcher searcher2 = newSearcher(ir, false); searcher2.setSimilarity(new PerFieldSimilarityWrapper() { final Similarity fooSim = new BoostingSimilarity(base, "foo_boost"); @Override public Similarity get(String field) { return "foo".equals(field) ? fooSim : base; } }); // in this case, we searched on field "foo". first document should have 2x the score. TermQuery tq = new TermQuery(new Term("foo", "quick")); QueryUtils.check(random(), tq, searcher1); QueryUtils.check(random(), tq, searcher2); TopDocs noboost = searcher1.search(tq, 10); TopDocs boost = searcher2.search(tq, 10); assertEquals(1, noboost.totalHits); assertEquals(1, boost.totalHits); //System.out.println(searcher2.explain(tq, boost.scoreDocs[0].doc)); assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score * 2f, SCORE_EPSILON); // this query matches only the second document, which should have 4x the score. tq = new TermQuery(new Term("foo", "jumps")); QueryUtils.check(random(), tq, searcher1); QueryUtils.check(random(), tq, searcher2); noboost = searcher1.search(tq, 10); boost = searcher2.search(tq, 10); assertEquals(1, noboost.totalHits); assertEquals(1, boost.totalHits); assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score * 4f, SCORE_EPSILON); // search on on field bar just for kicks, nothing should happen, since we setup // our sim provider to only use foo_boost for field foo. tq = new TermQuery(new Term("bar", "quick")); QueryUtils.check(random(), tq, searcher1); QueryUtils.check(random(), tq, searcher2); noboost = searcher1.search(tq, 10); boost = searcher2.search(tq, 10); assertEquals(1, noboost.totalHits); assertEquals(1, boost.totalHits); assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score, SCORE_EPSILON); ir.close(); dir.close(); }
71. TestOmitPositions#testPositions()
Project: lucene-solr
File: TestOmitPositions.java
File: TestOmitPositions.java
// Tests whether the DocumentWriter correctly enable the // omitTermFreqAndPositions bit in the FieldInfo public void testPositions() throws Exception { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(analyzer)); Document d = new Document(); // f1,f2,f3: docs only FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS); Field f1 = newField("f1", "This field has docs only", ft); d.add(f1); Field f2 = newField("f2", "This field has docs only", ft); d.add(f2); Field f3 = newField("f3", "This field has docs only", ft); d.add(f3); FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED); ft2.setIndexOptions(IndexOptions.DOCS_AND_FREQS); // f4,f5,f6 docs and freqs Field f4 = newField("f4", "This field has docs and freqs", ft2); d.add(f4); Field f5 = newField("f5", "This field has docs and freqs", ft2); d.add(f5); Field f6 = newField("f6", "This field has docs and freqs", ft2); d.add(f6); FieldType ft3 = new FieldType(TextField.TYPE_NOT_STORED); ft3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); // f7,f8,f9 docs/freqs/positions Field f7 = newField("f7", "This field has docs and freqs and positions", ft3); d.add(f7); Field f8 = newField("f8", "This field has docs and freqs and positions", ft3); d.add(f8); Field f9 = newField("f9", "This field has docs and freqs and positions", ft3); d.add(f9); writer.addDocument(d); writer.forceMerge(1); // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8, // and docs/freqs/positions for f3, f6, f9 d = new Document(); // f1,f4,f7: docs only f1 = newField("f1", "This field has docs only", ft); d.add(f1); f4 = newField("f4", "This field has docs only", ft); d.add(f4); f7 = newField("f7", "This field has docs only", ft); d.add(f7); // f2, f5, f8: docs and freqs f2 = newField("f2", "This field has docs and freqs", ft2); d.add(f2); f5 = newField("f5", "This field has docs and freqs", ft2); d.add(f5); f8 = newField("f8", "This field has docs and freqs", ft2); d.add(f8); // f3, f6, f9: docs and freqs and positions f3 = newField("f3", "This field has docs and freqs and positions", ft3); d.add(f3); f6 = newField("f6", "This field has docs and freqs and positions", ft3); d.add(f6); f9 = newField("f9", "This field has docs and freqs and positions", ft3); d.add(f9); writer.addDocument(d); // force merge writer.forceMerge(1); // flush writer.close(); LeafReader reader = getOnlyLeafReader(DirectoryReader.open(ram)); FieldInfos fi = reader.getFieldInfos(); // docs + docs = docs assertEquals(IndexOptions.DOCS, fi.fieldInfo("f1").getIndexOptions()); // docs + docs/freqs = docs assertEquals(IndexOptions.DOCS, fi.fieldInfo("f2").getIndexOptions()); // docs + docs/freqs/pos = docs assertEquals(IndexOptions.DOCS, fi.fieldInfo("f3").getIndexOptions()); // docs/freqs + docs = docs assertEquals(IndexOptions.DOCS, fi.fieldInfo("f4").getIndexOptions()); // docs/freqs + docs/freqs = docs/freqs assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f5").getIndexOptions()); // docs/freqs + docs/freqs/pos = docs/freqs assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f6").getIndexOptions()); // docs/freqs/pos + docs = docs assertEquals(IndexOptions.DOCS, fi.fieldInfo("f7").getIndexOptions()); // docs/freqs/pos + docs/freqs = docs/freqs assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f8").getIndexOptions()); // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f9").getIndexOptions()); reader.close(); ram.close(); }
72. TestIndexWriter#testIndexStoreCombos()
Project: lucene-solr
File: TestIndexWriter.java
File: TestIndexWriter.java
public void testIndexStoreCombos() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); byte[] b = new byte[50]; for (int i = 0; i < 50; i++) b[i] = (byte) (i + 77); Document doc = new Document(); FieldType customType = new FieldType(StoredField.TYPE); customType.setTokenized(true); Field f = new Field("binary", b, 10, 17, customType); // TODO: this is evil, changing the type after creating the field: customType.setIndexOptions(IndexOptions.DOCS); final MockTokenizer doc1field1 = new MockTokenizer(MockTokenizer.WHITESPACE, false); doc1field1.setReader(new StringReader("doc1field1")); f.setTokenStream(doc1field1); FieldType customType2 = new FieldType(TextField.TYPE_STORED); Field f2 = newField("string", "value", customType2); final MockTokenizer doc1field2 = new MockTokenizer(MockTokenizer.WHITESPACE, false); doc1field2.setReader(new StringReader("doc1field2")); f2.setTokenStream(doc1field2); doc.add(f); doc.add(f2); w.addDocument(doc); // add 2 docs to test in-memory merging final MockTokenizer doc2field1 = new MockTokenizer(MockTokenizer.WHITESPACE, false); doc2field1.setReader(new StringReader("doc2field1")); f.setTokenStream(doc2field1); final MockTokenizer doc2field2 = new MockTokenizer(MockTokenizer.WHITESPACE, false); doc2field2.setReader(new StringReader("doc2field2")); f2.setTokenStream(doc2field2); w.addDocument(doc); // force segment flush so we can force a segment merge with doc3 later. w.commit(); final MockTokenizer doc3field1 = new MockTokenizer(MockTokenizer.WHITESPACE, false); doc3field1.setReader(new StringReader("doc3field1")); f.setTokenStream(doc3field1); final MockTokenizer doc3field2 = new MockTokenizer(MockTokenizer.WHITESPACE, false); doc3field2.setReader(new StringReader("doc3field2")); f2.setTokenStream(doc3field2); w.addDocument(doc); w.commit(); // force segment merge. w.forceMerge(1); w.close(); IndexReader ir = DirectoryReader.open(dir); Document doc2 = ir.document(0); IndexableField f3 = doc2.getField("binary"); b = f3.binaryValue().bytes; assertTrue(b != null); assertEquals(17, b.length, 17); assertEquals(87, b[0]); assertTrue(ir.document(0).getField("binary").binaryValue() != null); assertTrue(ir.document(1).getField("binary").binaryValue() != null); assertTrue(ir.document(2).getField("binary").binaryValue() != null); assertEquals("value", ir.document(0).get("string")); assertEquals("value", ir.document(1).get("string")); assertEquals("value", ir.document(2).get("string")); // test that the terms were indexed. assertTrue(TestUtil.docs(random(), ir, "binary", new BytesRef("doc1field1"), null, PostingsEnum.NONE).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue(TestUtil.docs(random(), ir, "binary", new BytesRef("doc2field1"), null, PostingsEnum.NONE).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue(TestUtil.docs(random(), ir, "binary", new BytesRef("doc3field1"), null, PostingsEnum.NONE).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue(TestUtil.docs(random(), ir, "string", new BytesRef("doc1field2"), null, PostingsEnum.NONE).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue(TestUtil.docs(random(), ir, "string", new BytesRef("doc2field2"), null, PostingsEnum.NONE).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue(TestUtil.docs(random(), ir, "string", new BytesRef("doc3field2"), null, PostingsEnum.NONE).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); ir.close(); dir.close(); }
73. TestBlockPostingsFormat3#test()
Project: lucene-solr
File: TestBlockPostingsFormat3.java
File: TestBlockPostingsFormat3.java
// creates 8 fields with different options and does "duels" of fields against each other public void test() throws Exception { Directory dir = newDirectory(); Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(); if (fieldName.contains("payloadsFixed")) { TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1); return new TokenStreamComponents(tokenizer, filter); } else if (fieldName.contains("payloadsVariable")) { TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer); return new TokenStreamComponents(tokenizer, filter); } else { return new TokenStreamComponents(tokenizer); } } }; IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene50PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.setStoreTermVectors(true); docsOnlyType.setIndexOptions(IndexOptions.DOCS); FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.setStoreTermVectors(true); docsAndFreqsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.setStoreTermVectors(true); positionsType.setStoreTermVectorPositions(true); positionsType.setStoreTermVectorOffsets(true); positionsType.setStoreTermVectorPayloads(true); FieldType offsetsType = new FieldType(positionsType); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.add(field1); doc.add(field2); doc.add(field3); doc.add(field4); doc.add(field5); doc.add(field6); doc.add(field7); doc.add(field8); for (int i = 0; i < MAXDOC; i++) { String stringValue = Integer.toString(i) + " verycommon " + English.intToEnglish(i).replace('-', ' ') + " " + TestUtil.randomSimpleString(random()); field1.setStringValue(stringValue); field2.setStringValue(stringValue); field3.setStringValue(stringValue); field4.setStringValue(stringValue); field5.setStringValue(stringValue); field6.setStringValue(stringValue); field7.setStringValue(stringValue); field8.setStringValue(stringValue); iw.addDocument(doc); } iw.close(); verify(dir); // for some extra coverage, checkIndex before we forceMerge TestUtil.checkIndex(dir); iwc = newIndexWriterConfig(analyzer); iwc.setCodec(TestUtil.alwaysPostingsFormat(new Lucene50PostingsFormat())); iwc.setOpenMode(OpenMode.APPEND); IndexWriter iw2 = new IndexWriter(dir, iwc); iw2.forceMerge(1); iw2.close(); verify(dir); dir.close(); }
74. CustomPostingsHighlighterTests#testCustomPostingsHighlighter()
Project: elasticsearch
File: CustomPostingsHighlighterTests.java
File: CustomPostingsHighlighterTests.java
public void testCustomPostingsHighlighter() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); //good position but only one match final String firstValue = "This is a test. Just a test1 highlighting from postings highlighter."; Field body = new Field("body", "", offsetsType); Document doc = new Document(); doc.add(body); body.setStringValue(firstValue); //two matches, not the best snippet due to its length though final String secondValue = "This is the second highlighting value to perform highlighting on a longer text that gets scored lower."; Field body2 = new Field("body", "", offsetsType); doc.add(body2); body2.setStringValue(secondValue); //two matches and short, will be scored highest final String thirdValue = "This is highlighting the third short highlighting value."; Field body3 = new Field("body", "", offsetsType); doc.add(body3); body3.setStringValue(thirdValue); //one match, same as first but at the end, will be scored lower due to its position final String fourthValue = "Just a test4 highlighting from postings highlighter."; Field body4 = new Field("body", "", offsetsType); doc.add(body4); body4.setStringValue(fourthValue); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); String firstHlValue = "Just a test1 <b>highlighting</b> from postings highlighter."; String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a longer text that gets scored lower."; String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value."; String fourthHlValue = "Just a test4 <b>highlighting</b> from postings highlighter."; IndexSearcher searcher = newSearcher(ir); Query query = new TermQuery(new Term("body", "highlighting")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue; CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValue, false); Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5); assertThat(snippets.length, equalTo(4)); assertThat(snippets[0].getText(), equalTo(firstHlValue)); assertThat(snippets[1].getText(), equalTo(secondHlValue)); assertThat(snippets[2].getText(), equalTo(thirdHlValue)); assertThat(snippets[3].getText(), equalTo(fourthHlValue)); ir.close(); dir.close(); }
75. BaseDocValuesFormatTestCase#testThreads()
Project: lucene-solr
File: BaseDocValuesFormatTestCase.java
File: BaseDocValuesFormatTestCase.java
/** Tests dv against stored fields with threads (binary/numeric/sorted, no missing) */ public void testThreads() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedBinField = new StoredField("storedBin", new byte[0]); Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef()); Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef()); Field storedNumericField = new StoredField("storedNum", ""); Field dvNumericField = new NumericDocValuesField("dvNum", 0); doc.add(idField); doc.add(storedBinField); doc.add(dvBinField); doc.add(dvSortedField); doc.add(storedNumericField); doc.add(dvNumericField); // index some docs int numDocs = atLeast(300); for (int i = 0; i < numDocs; i++) { idField.setStringValue(Integer.toString(i)); int length = TestUtil.nextInt(random(), 0, 8); byte buffer[] = new byte[length]; random().nextBytes(buffer); storedBinField.setBytesValue(buffer); dvBinField.setBytesValue(buffer); dvSortedField.setBytesValue(buffer); long numericValue = random().nextLong(); storedNumericField.setStringValue(Long.toString(numericValue)); dvNumericField.setLongValue(numericValue); writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } writer.close(); // compare final DirectoryReader ir = DirectoryReader.open(dir); int numThreads = TestUtil.nextInt(random(), 2, 7); Thread threads[] = new Thread[numThreads]; final CountDownLatch startingGun = new CountDownLatch(1); for (int i = 0; i < threads.length; i++) { threads[i] = new Thread() { @Override public void run() { try { startingGun.await(); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); BinaryDocValues binaries = r.getBinaryDocValues("dvBin"); SortedDocValues sorted = r.getSortedDocValues("dvSorted"); NumericDocValues numerics = r.getNumericDocValues("dvNum"); for (int j = 0; j < r.maxDoc(); j++) { BytesRef binaryValue = r.document(j).getBinaryValue("storedBin"); BytesRef scratch = binaries.get(j); assertEquals(binaryValue, scratch); scratch = sorted.get(j); assertEquals(binaryValue, scratch); String expected = r.document(j).get("storedNum"); assertEquals(Long.parseLong(expected), numerics.get(j)); } } TestUtil.checkReader(ir); } catch (Exception e) { throw new RuntimeException(e); } } }; threads[i].start(); } startingGun.countDown(); for (Thread t : threads) { t.join(); } ir.close(); dir.close(); }
76. TestDiversifiedTopDocsCollector#setUp()
Project: lucene-solr
File: TestDiversifiedTopDocsCollector.java
File: TestDiversifiedTopDocsCollector.java
@Override public void setUp() throws Exception { super.setUp(); // populate an index with documents - artist, song and weeksAtNumberOne dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); Field yearField = newTextField("year", "", Field.Store.NO); SortedDocValuesField artistField = new SortedDocValuesField("artist", new BytesRef("")); Field weeksAtNumberOneField = new FloatDocValuesField("weeksAtNumberOne", 0.0F); Field weeksStoredField = new LegacyFloatField("weeks", 0.0F, Store.YES); Field idField = newStringField("id", "", Field.Store.YES); Field songField = newTextField("song", "", Field.Store.NO); Field storedArtistField = newTextField("artistName", "", Field.Store.NO); doc.add(idField); doc.add(weeksAtNumberOneField); doc.add(storedArtistField); doc.add(songField); doc.add(weeksStoredField); doc.add(yearField); doc.add(artistField); parsedRecords.clear(); for (int i = 0; i < hitsOfThe60s.length; i++) { String cols[] = hitsOfThe60s[i].split("\t"); Record record = new Record(String.valueOf(i), cols[0], cols[1], cols[2], Float.valueOf(cols[3])); parsedRecords.put(record.id, record); idField.setStringValue(record.id); yearField.setStringValue(record.year); storedArtistField.setStringValue(record.artist); artistField.setBytesValue(new BytesRef(record.artist)); songField.setStringValue(record.song); weeksStoredField.setFloatValue(record.weeks); weeksAtNumberOneField.setFloatValue(record.weeks); writer.addDocument(doc); if (i % 10 == 0) { // Causes the creation of multiple segments for our test writer.commit(); } } reader = writer.getReader(); writer.close(); searcher = newSearcher(reader); artistDocValues = MultiDocValues.getSortedValues(reader, "artist"); // All searches sort by song popularity final Similarity base = searcher.getSimilarity(true); searcher.setSimilarity(new DocValueSimilarity(base, "weeksAtNumberOne")); }
77. TestPostingsHighlighter#testUserFailedToIndexOffsets()
Project: lucene-solr
File: TestPostingsHighlighter.java
File: TestPostingsHighlighter.java
public void testUserFailedToIndexOffsets() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType positionsType = new FieldType(TextField.TYPE_STORED); positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); Field body = new Field("body", "", positionsType); Field title = new StringField("title", "", Field.Store.YES); Document doc = new Document(); doc.add(body); doc.add(title); body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore."); title.setStringValue("test"); iw.addDocument(doc); body.setStringValue("This test is another test. Not a good sentence. Test test test test."); title.setStringValue("test"); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighter(); Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); expectThrows(IllegalArgumentException.class, () -> { highlighter.highlight("body", query, searcher, topDocs, 2); }); expectThrows(IllegalArgumentException.class, () -> { highlighter.highlight("title", new TermQuery(new Term("title", "test")), searcher, topDocs, 2); fail("did not hit expected exception"); }); ir.close(); dir.close(); }
78. TestPostingsHighlighter#testMultipleFields()
Project: lucene-solr
File: TestPostingsHighlighter.java
File: TestPostingsHighlighter.java
public void testMultipleFields() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Field title = new Field("title", "", offsetsType); Document doc = new Document(); doc.add(body); doc.add(title); body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore."); title.setStringValue("I am hoping for the best."); iw.addDocument(doc); body.setStringValue("Highlighting the first term. Hope it works."); title.setStringValue("But best may not be good enough."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighter(); BooleanQuery.Builder query = new BooleanQuery.Builder(); query.add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD); query.add(new TermQuery(new Term("title", "best")), BooleanClause.Occur.SHOULD); TopDocs topDocs = searcher.search(query.build(), 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits); Map<String, String[]> snippets = highlighter.highlightFields(new String[] { "body", "title" }, query.build(), searcher, topDocs); assertEquals(2, snippets.size()); assertEquals("Just a test <b>highlighting</b> from postings. ", snippets.get("body")[0]); assertEquals("<b>Highlighting</b> the first term. ", snippets.get("body")[1]); assertEquals("I am hoping for the <b>best</b>.", snippets.get("title")[0]); assertEquals("But <b>best</b> may not be good enough.", snippets.get("title")[1]); ir.close(); dir.close(); }
79. TestPayloadsOnVectors#testMixupMultiValued()
Project: lucene-solr
File: TestPayloadsOnVectors.java
File: TestPayloadsOnVectors.java
/** some field instances have payload att, some not */ public void testMixupMultiValued() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorPositions(true); customType.setStoreTermVectorPayloads(true); customType.setStoreTermVectorOffsets(random().nextBoolean()); Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer) ts).setReader(new StringReader("here we go")); field.setTokenStream(ts); doc.add(field); Field field2 = new Field("field", "", customType); Token withPayload = new Token("withPayload", 0, 11); withPayload.setPayload(new BytesRef("test")); ts = new CannedTokenStream(withPayload); assertTrue(ts.hasAttribute(PayloadAttribute.class)); field2.setTokenStream(ts); doc.add(field2); Field field3 = new Field("field", "", customType); ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer) ts).setReader(new StringReader("nopayload")); field3.setTokenStream(ts); doc.add(field3); writer.addDocument(doc); DirectoryReader reader = writer.getReader(); Terms terms = reader.getTermVector(0, "field"); assert terms != null; TermsEnum termsEnum = terms.iterator(); assertTrue(termsEnum.seekExact(new BytesRef("withPayload"))); PostingsEnum de = termsEnum.postings(null, PostingsEnum.ALL); assertEquals(0, de.nextDoc()); assertEquals(3, de.nextPosition()); assertEquals(new BytesRef("test"), de.getPayload()); writer.close(); reader.close(); dir.close(); }
80. TestPayloads#testMixupMultiValued()
Project: lucene-solr
File: TestPayloads.java
File: TestPayloads.java
/** some field instances have payload att, some not */ public void testMixupMultiValued() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); Field field = new TextField("field", "", Field.Store.NO); TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer) ts).setReader(new StringReader("here we go")); field.setTokenStream(ts); doc.add(field); Field field2 = new TextField("field", "", Field.Store.NO); Token withPayload = new Token("withPayload", 0, 11); withPayload.setPayload(new BytesRef("test")); ts = new CannedTokenStream(withPayload); assertTrue(ts.hasAttribute(PayloadAttribute.class)); field2.setTokenStream(ts); doc.add(field2); Field field3 = new TextField("field", "", Field.Store.NO); ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer) ts).setReader(new StringReader("nopayload")); field3.setTokenStream(ts); doc.add(field3); writer.addDocument(doc); DirectoryReader reader = writer.getReader(); LeafReader sr = getOnlyLeafReader(reader); PostingsEnum de = sr.postings(new Term("field", "withPayload"), PostingsEnum.PAYLOADS); de.nextDoc(); de.nextPosition(); assertEquals(new BytesRef("test"), de.getPayload()); writer.close(); reader.close(); dir.close(); }
81. TestSloppyPhraseQuery#testSlopWithHoles()
Project: lucene-solr
File: TestSloppyPhraseQuery.java
File: TestSloppyPhraseQuery.java
// LUCENE-3215 public void testSlopWithHoles() throws Exception { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setOmitNorms(true); Field f = new Field("lyrics", "", customType); Document doc = new Document(); doc.add(f); f.setStringValue("drug drug"); iw.addDocument(doc); f.setStringValue("drug druggy drug"); iw.addDocument(doc); f.setStringValue("drug druggy druggy drug"); iw.addDocument(doc); f.setStringValue("drug druggy drug druggy drug"); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher is = newSearcher(ir); PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.add(new Term("lyrics", "drug"), 1); builder.add(new Term("lyrics", "drug"), 4); PhraseQuery pq = builder.build(); // "drug the drug"~1 assertEquals(1, is.search(pq, 4).totalHits); builder.setSlop(1); pq = builder.build(); assertEquals(3, is.search(pq, 4).totalHits); builder.setSlop(2); pq = builder.build(); assertEquals(4, is.search(pq, 4).totalHits); ir.close(); dir.close(); }
82. TestOmitNorms#testOmitNormsCombos()
Project: lucene-solr
File: TestOmitNorms.java
File: TestOmitNorms.java
/** * Tests various combinations of omitNorms=true/false, the field not existing at all, * ensuring that only omitNorms is 'viral'. * Internally checks that MultiNorms.norms() is consistent (returns the same bytes) * as the fully merged equivalent. */ public void testOmitNormsCombos() throws IOException { // indexed with norms FieldType customType = new FieldType(TextField.TYPE_STORED); Field norms = new Field("foo", "a", customType); // indexed without norms FieldType customType1 = new FieldType(TextField.TYPE_STORED); customType1.setOmitNorms(true); Field noNorms = new Field("foo", "a", customType1); // not indexed, but stored FieldType customType2 = new FieldType(); customType2.setStored(true); Field noIndex = new Field("foo", "a", customType2); // not indexed but stored, omitNorms is set FieldType customType3 = new FieldType(); customType3.setStored(true); customType3.setOmitNorms(true); Field noNormsNoIndex = new Field("foo", "a", customType3); // not indexed nor stored (doesnt exist at all, we index a different field instead) Field emptyNorms = new Field("bar", "a", customType); assertNotNull(getNorms("foo", norms, norms)); assertNull(getNorms("foo", norms, noNorms)); assertNotNull(getNorms("foo", norms, noIndex)); assertNotNull(getNorms("foo", norms, noNormsNoIndex)); assertNotNull(getNorms("foo", norms, emptyNorms)); assertNull(getNorms("foo", noNorms, noNorms)); assertNull(getNorms("foo", noNorms, noIndex)); assertNull(getNorms("foo", noNorms, noNormsNoIndex)); assertNull(getNorms("foo", noNorms, emptyNorms)); assertNull(getNorms("foo", noIndex, noIndex)); assertNull(getNorms("foo", noIndex, noNormsNoIndex)); assertNull(getNorms("foo", noIndex, emptyNorms)); assertNull(getNorms("foo", noNormsNoIndex, noNormsNoIndex)); assertNull(getNorms("foo", noNormsNoIndex, emptyNorms)); assertNull(getNorms("foo", emptyNorms, emptyNorms)); }
83. TestIndexWriter#testWickedLongTerm()
Project: lucene-solr
File: TestIndexWriter.java
File: TestIndexWriter.java
/** * Make sure we skip wicked long terms. */ public void testWickedLongTerm() throws IOException { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir, new StringSplitAnalyzer()); char[] chars = new char[DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8]; Arrays.fill(chars, 'x'); Document hugeDoc = new Document(); final String bigTerm = new String(chars); // This contents produces a too-long term: String contents = "abc xyz x" + bigTerm + " another term"; hugeDoc.add(new TextField("content", contents, Field.Store.NO)); expectThrows(IllegalArgumentException.class, () -> { w.addDocument(hugeDoc); }); // Make sure we can add another normal document Document doc = new Document(); doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO)); w.addDocument(doc); // So we remove the deleted doc: w.forceMerge(1); IndexReader reader = w.getReader(); w.close(); // Make sure all terms < max size were indexed assertEquals(1, reader.docFreq(new Term("content", "abc"))); assertEquals(1, reader.docFreq(new Term("content", "bbb"))); assertEquals(0, reader.docFreq(new Term("content", "term"))); // Make sure the doc that has the massive term is NOT in // the index: assertEquals("document with wicked long term is in the index!", 1, reader.numDocs()); reader.close(); dir.close(); dir = newDirectory(); // Make sure we can add a document with exactly the // maximum length term, and search on that term: doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setTokenized(false); Field contentField = new Field("content", "", customType); doc.add(contentField); IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setCodec(TestUtil.getDefaultCodec()); RandomIndexWriter w2 = new RandomIndexWriter(random(), dir, iwc); contentField.setStringValue("other"); w2.addDocument(doc); contentField.setStringValue("term"); w2.addDocument(doc); contentField.setStringValue(bigTerm); w2.addDocument(doc); contentField.setStringValue("zzz"); w2.addDocument(doc); reader = w2.getReader(); w2.close(); assertEquals(1, reader.docFreq(new Term("content", bigTerm))); reader.close(); dir.close(); }
84. TestPrefixInBooleanQuery#beforeClass()
Project: lucene-solr
File: TestPrefixInBooleanQuery.java
File: TestPrefixInBooleanQuery.java
@BeforeClass public static void beforeClass() throws Exception { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); Document doc = new Document(); Field field = newStringField(FIELD, "meaninglessnames", Field.Store.NO); doc.add(field); for (int i = 0; i < 5137; ++i) { writer.addDocument(doc); } field.setStringValue("tangfulin"); writer.addDocument(doc); field.setStringValue("meaninglessnames"); for (int i = 5138; i < 11377; ++i) { writer.addDocument(doc); } field.setStringValue("tangfulin"); writer.addDocument(doc); reader = writer.getReader(); searcher = newSearcher(reader); writer.close(); }
85. TestPayloadsOnVectors#testMixupDocs()
Project: lucene-solr
File: TestPayloadsOnVectors.java
File: TestPayloadsOnVectors.java
/** some docs have payload att, some not */ public void testMixupDocs() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorPositions(true); customType.setStoreTermVectorPayloads(true); customType.setStoreTermVectorOffsets(random().nextBoolean()); Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer) ts).setReader(new StringReader("here we go")); field.setTokenStream(ts); doc.add(field); writer.addDocument(doc); Token withPayload = new Token("withPayload", 0, 11); withPayload.setPayload(new BytesRef("test")); ts = new CannedTokenStream(withPayload); assertTrue(ts.hasAttribute(PayloadAttribute.class)); field.setTokenStream(ts); writer.addDocument(doc); ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer) ts).setReader(new StringReader("another")); field.setTokenStream(ts); writer.addDocument(doc); DirectoryReader reader = writer.getReader(); Terms terms = reader.getTermVector(1, "field"); assert terms != null; TermsEnum termsEnum = terms.iterator(); assertTrue(termsEnum.seekExact(new BytesRef("withPayload"))); PostingsEnum de = termsEnum.postings(null, PostingsEnum.ALL); assertEquals(0, de.nextDoc()); assertEquals(0, de.nextPosition()); assertEquals(new BytesRef("test"), de.getPayload()); writer.close(); reader.close(); dir.close(); }
86. TestPayloads#testMixupDocs()
Project: lucene-solr
File: TestPayloads.java
File: TestPayloads.java
/** some docs have payload att, some not */ public void testMixupDocs() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); Field field = new TextField("field", "", Field.Store.NO); TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer) ts).setReader(new StringReader("here we go")); field.setTokenStream(ts); doc.add(field); writer.addDocument(doc); Token withPayload = new Token("withPayload", 0, 11); withPayload.setPayload(new BytesRef("test")); ts = new CannedTokenStream(withPayload); assertTrue(ts.hasAttribute(PayloadAttribute.class)); field.setTokenStream(ts); writer.addDocument(doc); ts = new MockTokenizer(MockTokenizer.WHITESPACE, true); ((Tokenizer) ts).setReader(new StringReader("another")); field.setTokenStream(ts); writer.addDocument(doc); DirectoryReader reader = writer.getReader(); TermsEnum te = MultiFields.getFields(reader).terms("field").iterator(); assertTrue(te.seekExact(new BytesRef("withPayload"))); PostingsEnum de = te.postings(null, PostingsEnum.PAYLOADS); de.nextDoc(); de.nextPosition(); assertEquals(new BytesRef("test"), de.getPayload()); writer.close(); reader.close(); dir.close(); }
87. FieldBuilderTest#testCreateFieldIndexStoredCompress()
Project: uima-addons
File: FieldBuilderTest.java
File: FieldBuilderTest.java
@Test public void testCreateFieldIndexStoredCompress() throws Exception { fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES); fieldDescription.setStored(FieldBuilder.FIELD_STORE_COMPRESS); tokenStreams.remove(1); expect(filterBuilder.filter(isA(TokenStream.class), isA(Collection.class))).andReturn(tokenStream1); replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); assertEquals(4, fields.size()); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertFalse(field1.isIndexed()); assertTrue(field1.isStored()); assertTrue(field1.isCompressed()); assertEquals("token1", field1.stringValue()); Field field2 = fieldIterator.next(); assertEquals("field1", field2.name()); assertFalse(field2.isIndexed()); assertTrue(field2.isStored()); assertTrue(field2.isCompressed()); assertEquals("token2", field2.stringValue()); Field field3 = fieldIterator.next(); assertEquals("field1", field3.name()); assertFalse(field3.isIndexed()); assertTrue(field3.isStored()); assertTrue(field3.isCompressed()); assertEquals("token3", field3.stringValue()); Field field4 = fieldIterator.next(); assertEquals("field1", field4.name()); assertTrue(field4.isIndexed()); assertFalse(field4.isStored()); }
88. FieldBuilderTest#testCreateFieldIndexStored()
Project: uima-addons
File: FieldBuilderTest.java
File: FieldBuilderTest.java
@Test public void testCreateFieldIndexStored() throws Exception { fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES); fieldDescription.setStored(FieldBuilder.FIELD_STORE_YES); tokenStreams.remove(1); expect(filterBuilder.filter(isA(TokenStream.class), isA(Collection.class))).andReturn(tokenStream1); replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); assertEquals(4, fields.size()); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertFalse(field1.isIndexed()); assertTrue(field1.isStored()); assertEquals("token1", field1.stringValue()); Field field2 = fieldIterator.next(); assertEquals("field1", field2.name()); assertFalse(field2.isIndexed()); assertTrue(field2.isStored()); assertEquals("token2", field2.stringValue()); Field field3 = fieldIterator.next(); assertEquals("field1", field3.name()); assertFalse(field3.isIndexed()); assertTrue(field3.isStored()); assertEquals("token3", field3.stringValue()); Field field4 = fieldIterator.next(); assertEquals("field1", field4.name()); assertTrue(field4.isIndexed()); assertFalse(field4.isStored()); }
89. CustomPostingsHighlighterTests#testNoMatchSize()
Project: elasticsearch
File: CustomPostingsHighlighterTests.java
File: CustomPostingsHighlighterTests.java
public void testNoMatchSize() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Field none = new Field("none", "", offsetsType); Document doc = new Document(); doc.add(body); doc.add(none); String firstValue = "This is a test. Just a test highlighting from postings. Feel free to ignore."; body.setStringValue(firstValue); none.setStringValue(firstValue); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); Query query = new TermQuery(new Term("none", "highlighting")); IndexSearcher searcher = newSearcher(ir); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, false); Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5); assertThat(snippets.length, equalTo(0)); highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, true); snippets = highlighter.highlightField("body", query, searcher, docId, 5); assertThat(snippets.length, equalTo(1)); assertThat(snippets[0].getText(), equalTo("This is a test.")); ir.close(); dir.close(); }
90. TestDocTermOrds#testSimple()
Project: lucene-solr
File: TestDocTermOrds.java
File: TestDocTermOrds.java
public void testSimple() throws Exception { Directory dir = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); Field field = newTextField("field", "", Field.Store.NO); doc.add(field); field.setStringValue("a b c"); w.addDocument(doc); field.setStringValue("d e f"); w.addDocument(doc); field.setStringValue("a f"); w.addDocument(doc); final IndexReader r = w.getReader(); w.close(); final LeafReader ar = SlowCompositeReaderWrapper.wrap(r); TestUtil.checkReader(ar); final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field"); SortedSetDocValues iter = dto.iterator(ar); iter.setDocument(0); assertEquals(0, iter.nextOrd()); assertEquals(1, iter.nextOrd()); assertEquals(2, iter.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); iter.setDocument(1); assertEquals(3, iter.nextOrd()); assertEquals(4, iter.nextOrd()); assertEquals(5, iter.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); iter.setDocument(2); assertEquals(0, iter.nextOrd()); assertEquals(5, iter.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); r.close(); dir.close(); }
91. TestPostingsHighlighter#testGapSeparator()
Project: lucene-solr
File: TestPostingsHighlighter.java
File: TestPostingsHighlighter.java
/** customizing the gap separator to force a sentence break */ public void testGapSeparator() throws Exception { Directory dir = newDirectory(); // use simpleanalyzer for more natural tokenization (else "test." is a token) IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Document doc = new Document(); Field body1 = new Field("body", "", offsetsType); body1.setStringValue("This is a multivalued field"); doc.add(body1); Field body2 = new Field("body", "", offsetsType); body2.setStringValue("This is something different"); doc.add(body2); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighter() { @Override protected char getMultiValuedSeparator(String field) { assert field.equals("body"); return '?'; } }; Query query = new TermQuery(new Term("body", "field")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits); String snippets[] = highlighter.highlight("body", query, searcher, topDocs); assertEquals(1, snippets.length); assertEquals("This is a multivalued <b>field</b>?", snippets[0]); ir.close(); dir.close(); }
92. TestPostingsHighlighter#testMultipleSnippetSizes()
Project: lucene-solr
File: TestPostingsHighlighter.java
File: TestPostingsHighlighter.java
public void testMultipleSnippetSizes() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType offsetsType = new FieldType(TextField.TYPE_STORED); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field body = new Field("body", "", offsetsType); Field title = new Field("title", "", offsetsType); Document doc = new Document(); doc.add(body); doc.add(title); body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore."); title.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighter(); BooleanQuery.Builder query = new BooleanQuery.Builder(); query.add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.SHOULD); query.add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD); Map<String, String[]> snippets = highlighter.highlightFields(new String[] { "title", "body" }, query.build(), searcher, new int[] { 0 }, new int[] { 1, 2 }); String titleHighlight = snippets.get("title")[0]; String bodyHighlight = snippets.get("body")[0]; assertEquals("This is a <b>test</b>. ", titleHighlight); assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", bodyHighlight); ir.close(); dir.close(); }
93. AlfrescoDataType#createField()
Project: community-edition
File: AlfrescoDataType.java
File: AlfrescoDataType.java
public Field createField(SchemaField field, String externalVal, float boost) { String val; try { val = toInternal(externalVal); } catch (RuntimeException e) { throw e; } if (val == null) return null; if (!field.indexed() && !field.stored()) { return null; } Field f = new Field(field.getName(), val, getFieldStore(field, val), getFieldIndex(field, val), getFieldTermVec(field, val)); f.setOmitNorms(getOmitNorms(field, val)); f.setOmitTermFreqAndPositions(field.omitTf()); // f.setBoost(boost); return f; }
94. TrainMoreLikeThis#buildTfidfIndex()
Project: book
File: TrainMoreLikeThis.java
File: TrainMoreLikeThis.java
/** builds a lucene index suitable for tfidf based classification. Each categories content is indexed into * a single document in the index, and the best match for a MoreLikeThis query is the category that * is assigned. * @param inputFiles * @param writer * @throws Exception */ protected void buildTfidfIndex(File[] inputFiles) throws Exception { int lineCount = 0; int fileCount = 0; String line = null; Set<String> categories = new HashSet<String>(); long start = System.currentTimeMillis(); // reuse these fields Field id = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO); Field categoryField = new Field("category", "", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO); Field contentField = new Field("content", "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); for (File ff : inputFiles) { fileCount++; lineCount = 0; // read all training documents into a string BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(ff), "UTF-8")); //<start id="lucene.examples.tfidf.train"/> StringBuilder content = new StringBuilder(); String category = null; while ((line = in.readLine()) != null) { //<co id="luc.tf.content"/> String[] parts = line.split("\t"); if (parts.length != 2) continue; category = parts[0]; categories.add(category); content.append(parts[1]).append(" "); lineCount++; } in.close(); //<co id="luc.tf.document"/> Document d = new Document(); id.setValue(category + "-" + lineCount); categoryField.setValue(category); contentField.setValue(content.toString()); d.add(id); d.add(categoryField); d.add(contentField); //<co id="luc.tf.index"/> writer.addDocument(d); /*<calloutlist> <callout arearefs="luc.tf.content">Collect Content</callout> <callout arearefs="luc.tf.document">Build Document</callout> <callout arearefs="luc.tf.index">Index Document</callout> </calloutlist>*/ //<end id="lucene.examples.tfidf.train"/> log.info("TfIdf: Added document for category " + category + " with " + lineCount + " lines"); } writer.commit(generateUserData(categories)); log.info("TfIdf: Added " + fileCount + " categories in " + (System.currentTimeMillis() - start) + " msec."); }
95. TrainMoreLikeThis#buildKnnIndex()
Project: book
File: TrainMoreLikeThis.java
File: TrainMoreLikeThis.java
/** builda a lucene index suidable for knn based classification. Each category's content is indexed into * separate documents in the index, and the category that has the haghest count in the tip N hits is * is the category that is assigned. * @param inputFiles * @param writer * @throws Exception */ protected void buildKnnIndex(File[] inputFiles) throws Exception { int lineCount = 0; int fileCount = 0; String line = null; String category = null; Set<String> categories = new HashSet<String>(); long start = System.currentTimeMillis(); // reuse these fields //<start id="lucene.examples.fields"/> Field id = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO); Field categoryField = new Field("category", "", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO); Field contentField = new Field("content", "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); for (File ff : inputFiles) { fileCount++; lineCount = 0; category = null; BufferedReader in = new BufferedReader(new FileReader(ff)); //<start id="lucene.examples.knn.train"/> while ((line = in.readLine()) != null) { //<co id="luc.knn.content"/> String[] parts = line.split("\t"); if (parts.length != 2) continue; category = parts[0]; categories.add(category); //<co id="luc.knn.document"/> Document d = new Document(); id.setValue(category + "-" + lineCount++); categoryField.setValue(category); contentField.setValue(parts[1]); d.add(id); d.add(categoryField); d.add(contentField); //<co id="luc.knn.index"/> writer.addDocument(d); } /*<calloutlist> <callout arearefs="luc.knn.content">Collect Content</callout> <callout arearefs="luc.knn.document">Build Document</callout> <callout arearefs="luc.knn.index">Index Document</callout> </calloutlist>*/ //<end id="lucene.examples.knn.train"/> in.close(); log.info("Knn: Added document for category " + category + " with " + lineCount + " lines"); } writer.commit(generateUserData(categories)); log.info("Knn: Added " + fileCount + " categories in " + (System.currentTimeMillis() - start) + " msec."); }
96. TestFieldCacheVsDocValues#doTestMissingVsFieldCache()
Project: lucene-solr
File: TestFieldCacheVsDocValues.java
File: TestFieldCacheVsDocValues.java
private void doTestMissingVsFieldCache(LongProducer longs) throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Field idField = new StringField("id", "", Field.Store.NO); Field indexedField = newStringField("indexed", "", Field.Store.NO); Field dvField = new NumericDocValuesField("dv", 0); // index some docs int numDocs = atLeast(300); // for numbers of values <= 256, all storage layouts are tested assert numDocs > 256; for (int i = 0; i < numDocs; i++) { idField.setStringValue(Integer.toString(i)); long value = longs.next(); indexedField.setStringValue(Long.toString(value)); dvField.setLongValue(value); Document doc = new Document(); doc.add(idField); // 1/4 of the time we neglect to add the fields if (random().nextInt(4) > 0) { doc.add(indexedField); doc.add(dvField); } writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } // merge some segments and ensure that at least one of them has more than // 256 values writer.forceMerge(numDocs / 256); writer.close(); // compare DirectoryReader ir = DirectoryReader.open(dir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); Bits expected = FieldCache.DEFAULT.getDocsWithField(r, "indexed", null); Bits actual = FieldCache.DEFAULT.getDocsWithField(r, "dv", null); assertEquals(expected, actual); } ir.close(); dir.close(); }
97. TestFieldCacheVsDocValues#doTestSortedVsFieldCache()
Project: lucene-solr
File: TestFieldCacheVsDocValues.java
File: TestFieldCacheVsDocValues.java
private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field indexedField = new StringField("indexed", "", Field.Store.NO); Field dvField = new SortedDocValuesField("dv", new BytesRef()); doc.add(idField); doc.add(indexedField); doc.add(dvField); // index some docs int numDocs = atLeast(300); for (int i = 0; i < numDocs; i++) { idField.setStringValue(Integer.toString(i)); final int length; if (minLength == maxLength) { // fixed length length = minLength; } else { length = TestUtil.nextInt(random(), minLength, maxLength); } String value = TestUtil.randomSimpleString(random(), length); indexedField.setStringValue(value); dvField.setBytesValue(new BytesRef(value)); writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } writer.close(); // compare DirectoryReader ir = DirectoryReader.open(dir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed"); SortedDocValues actual = r.getSortedDocValues("dv"); assertEquals(r.maxDoc(), expected, actual); } ir.close(); dir.close(); }
98. BaseStoredFieldsFormatTestCase#testBigDocuments()
Project: lucene-solr
File: BaseStoredFieldsFormatTestCase.java
File: BaseStoredFieldsFormatTestCase.java
@Nightly public void testBigDocuments() throws IOException { // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("testBigDocuments"))); IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER); } // emptyDoc final Document emptyDoc = new Document(); // lot of small fields final Document bigDoc1 = new Document(); // 1 very big field final Document bigDoc2 = new Document(); final Field idField = new StringField("id", "", Store.NO); emptyDoc.add(idField); bigDoc1.add(idField); bigDoc2.add(idField); final FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.setIndexOptions(IndexOptions.NONE); final Field smallField = new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored); final int numFields = RandomInts.randomIntBetween(random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.add(smallField); } final Field bigField = new Field("fld", randomByteArray(RandomInts.randomIntBetween(random(), 1000000, 5000000), 2), onlyStored); bigDoc2.add(bigField); final int numDocs = atLeast(5); final Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.setStringValue("" + i); iw.addDocument(docs[i]); if (random().nextInt(numDocs) == 0) { iw.commit(); } } iw.commit(); // look at what happens when big docs are merged iw.forceMerge(1); final DirectoryReader rd = DirectoryReader.open(dir); final IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { final Query query = new TermQuery(new Term("id", "" + i)); final TopDocs topDocs = searcher.search(query, 1); assertEquals("" + i, 1, topDocs.totalHits); final Document doc = rd.document(topDocs.scoreDocs[0].doc); assertNotNull(doc); final IndexableField[] fieldValues = doc.getFields("fld"); assertEquals(docs[i].getFields("fld").length, fieldValues.length); if (fieldValues.length > 0) { assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue()); } } rd.close(); iw.close(); dir.close(); }
99. BaseNormsFormatTestCase#doTestNormsVersusDocValues()
Project: lucene-solr
File: BaseNormsFormatTestCase.java
File: BaseNormsFormatTestCase.java
private void doTestNormsVersusDocValues(LongProducer longs) throws Exception { int numDocs = atLeast(500); long norms[] = new long[numDocs]; for (int i = 0; i < numDocs; i++) { norms[i] = longs.next(); } Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setSimilarity(new CannedNormSimilarity(norms)); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field indexedField = new TextField("indexed", "", Field.Store.NO); Field dvField = new NumericDocValuesField("dv", 0); doc.add(idField); doc.add(indexedField); doc.add(dvField); for (int i = 0; i < numDocs; i++) { idField.setStringValue(Integer.toString(i)); long value = norms[i]; dvField.setLongValue(value); indexedField.setStringValue(Long.toString(value)); writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs / 20); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } writer.commit(); // compare DirectoryReader ir = DirectoryReader.open(dir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); NumericDocValues expected = r.getNumericDocValues("dv"); NumericDocValues actual = r.getNormValues("indexed"); for (int i = 0; i < r.maxDoc(); i++) { assertEquals("doc " + i, expected.get(i), actual.get(i)); } } ir.close(); writer.forceMerge(1); // compare again ir = DirectoryReader.open(dir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); NumericDocValues expected = r.getNumericDocValues("dv"); NumericDocValues actual = r.getNormValues("indexed"); for (int i = 0; i < r.maxDoc(); i++) { assertEquals("doc " + i, expected.get(i), actual.get(i)); } } writer.close(); ir.close(); dir.close(); }
100. BaseDocValuesFormatTestCase#doTestSortedVsStoredFields()
Project: lucene-solr
File: BaseDocValuesFormatTestCase.java
File: BaseDocValuesFormatTestCase.java
protected void doTestSortedVsStoredFields(int numDocs, int minLength, int maxLength) throws Exception { Directory dir = newFSDirectory(createTempDir("dvduel")); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedField = new StoredField("stored", new byte[0]); Field dvField = new SortedDocValuesField("dv", new BytesRef()); doc.add(idField); doc.add(storedField); doc.add(dvField); // index some docs for (int i = 0; i < numDocs; i++) { idField.setStringValue(Integer.toString(i)); final int length; if (minLength == maxLength) { // fixed length length = minLength; } else { length = TestUtil.nextInt(random(), minLength, maxLength); } byte buffer[] = new byte[length]; random().nextBytes(buffer); storedField.setBytesValue(buffer); dvField.setBytesValue(buffer); writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } // compare DirectoryReader ir = writer.getReader(); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); BinaryDocValues docValues = r.getSortedDocValues("dv"); for (int i = 0; i < r.maxDoc(); i++) { BytesRef binaryValue = r.document(i).getBinaryValue("stored"); BytesRef scratch = docValues.get(i); assertEquals(binaryValue, scratch); } } ir.close(); writer.forceMerge(1); // compare again ir = writer.getReader(); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); BinaryDocValues docValues = r.getSortedDocValues("dv"); for (int i = 0; i < r.maxDoc(); i++) { BytesRef binaryValue = r.document(i).getBinaryValue("stored"); BytesRef scratch = docValues.get(i); assertEquals(binaryValue, scratch); } } ir.close(); writer.close(); dir.close(); }