version: '0.7' components: # define all the building-blocks for Pipeline - name: FAISSDocumentStore type: FAISSDocumentStore params: faiss_index_factory_str: Flat - name: ElasticsearchDocumentStore type: ElasticsearchDocumentStore params: host: localhost - name: DPRetriever type: DensePassageRetriever params: document_store: FAISSDocumentStore # params can reference other components defined in the YAML top_k: 5 query_embedding_model: facebook/dpr-question_encoder-single-nq-base passage_embedding_model: facebook/dpr-ctx_encoder-single-nq-base max_seq_len_query: 64 max_seq_len_passage: 256 batch_size: 16 use_gpu: True embed_title: True use_fast_tokenizers: True - name: Reader # custom-name for the component; helpful for visualization & debugging type: FARMReader # Haystack Class name for the component params: model_name_or_path: deepset/roberta-base-squad2 - name: TextFileConverter type: TextConverter - name: PDFFileConverter type: PDFToTextConverter - name: Preprocessor type: PreProcessor params: clean_empty_lines: True clean_whitespace: True clean_header_footer: False split_by: word split_length: 350 split_respect_sentence_boundary: True - name: FileTypeClassifier type: FileTypeClassifier pipelines: - name: query # a sample extractive-qa Pipeline type: Query nodes: - name: DPRetriever inputs: [Query] - name: Reader inputs: [DPRetriever] - name: indexing type: Indexing nodes: - name: FileTypeClassifier inputs: [File] - name: TextFileConverter inputs: [FileTypeClassifier.output_1] - name: PDFFileConverter inputs: [FileTypeClassifier.output_2] - name: Preprocessor inputs: [PDFFileConverter, TextFileConverter] - name: FAISSDocumentStore inputs: [Preprocessor]