Spaces:

vespa-engine
/

colpali-vespa-visual-retrieval

Running on T4

File size: 7,716 Bytes

schema pdf_page {
    document pdf_page {
        field id type string {
            indexing: summary | index
            match {
                word
            }
        }
        field url type string {
            indexing: summary | index
        }
        field title type string {
            indexing: summary | index
            index: enable-bm25
            match {
                text
            }
        }
        field page_number type int {
            indexing: summary | attribute
        }
        field image type raw {
            indexing: summary
        }
        field full_image type raw {
            indexing: summary
        }
        field text type string {
            indexing: summary | index
            index: enable-bm25
            match {
                text
            }
        }
        field embedding type tensor<int8>(patch{}, v[16]) {
            indexing: attribute | index
            attribute {
                distance-metric: hamming
            }
            index {
                hnsw {
                    max-links-per-node: 32
                    neighbors-to-explore-at-insert: 400
                }
            }
        }
    }
    fieldset default {
        fields: title, url, page_number, text
    }
    fieldset image {
        fields: image
    }
    rank-profile default {
        inputs {
            query(qt) tensor<float>(querytoken{}, v[128])             
        
        }
        function max_sim() {
            expression {
                
                                    sum(
                                        reduce(
                                            sum(
                                                query(qt) * unpack_bits(attribute(embedding)) , v
                                            ),
                                            max, patch
                                        ),
                                        querytoken
                                    )
                                
            }
        }
        function bm25_score() {
            expression {
                bm25(title) + bm25(text)
            }
        }
        first-phase {
            expression {
                bm25_score
            }
        }
        second-phase {
            rerank-count: 10
            expression {
                max_sim
            }
        }
    }
    rank-profile retrieval-and-rerank {
        inputs {
            query(rq0) tensor<int8>(v[16])             
            query(rq1) tensor<int8>(v[16])             
            query(rq2) tensor<int8>(v[16])             
            query(rq3) tensor<int8>(v[16])             
            query(rq4) tensor<int8>(v[16])             
            query(rq5) tensor<int8>(v[16])             
            query(rq6) tensor<int8>(v[16])             
            query(rq7) tensor<int8>(v[16])             
            query(rq8) tensor<int8>(v[16])             
            query(rq9) tensor<int8>(v[16])             
            query(rq10) tensor<int8>(v[16])             
            query(rq11) tensor<int8>(v[16])             
            query(rq12) tensor<int8>(v[16])             
            query(rq13) tensor<int8>(v[16])             
            query(rq14) tensor<int8>(v[16])             
            query(rq15) tensor<int8>(v[16])             
            query(rq16) tensor<int8>(v[16])             
            query(rq17) tensor<int8>(v[16])             
            query(rq18) tensor<int8>(v[16])             
            query(rq19) tensor<int8>(v[16])             
            query(rq20) tensor<int8>(v[16])             
            query(rq21) tensor<int8>(v[16])             
            query(rq22) tensor<int8>(v[16])             
            query(rq23) tensor<int8>(v[16])             
            query(rq24) tensor<int8>(v[16])             
            query(rq25) tensor<int8>(v[16])             
            query(rq26) tensor<int8>(v[16])             
            query(rq27) tensor<int8>(v[16])             
            query(rq28) tensor<int8>(v[16])             
            query(rq29) tensor<int8>(v[16])             
            query(rq30) tensor<int8>(v[16])             
            query(rq31) tensor<int8>(v[16])             
            query(rq32) tensor<int8>(v[16])             
            query(rq33) tensor<int8>(v[16])             
            query(rq34) tensor<int8>(v[16])             
            query(rq35) tensor<int8>(v[16])             
            query(rq36) tensor<int8>(v[16])             
            query(rq37) tensor<int8>(v[16])             
            query(rq38) tensor<int8>(v[16])             
            query(rq39) tensor<int8>(v[16])             
            query(rq40) tensor<int8>(v[16])             
            query(rq41) tensor<int8>(v[16])             
            query(rq42) tensor<int8>(v[16])             
            query(rq43) tensor<int8>(v[16])             
            query(rq44) tensor<int8>(v[16])             
            query(rq45) tensor<int8>(v[16])             
            query(rq46) tensor<int8>(v[16])             
            query(rq47) tensor<int8>(v[16])             
            query(rq48) tensor<int8>(v[16])             
            query(rq49) tensor<int8>(v[16])             
            query(rq50) tensor<int8>(v[16])             
            query(rq51) tensor<int8>(v[16])             
            query(rq52) tensor<int8>(v[16])             
            query(rq53) tensor<int8>(v[16])             
            query(rq54) tensor<int8>(v[16])             
            query(rq55) tensor<int8>(v[16])             
            query(rq56) tensor<int8>(v[16])             
            query(rq57) tensor<int8>(v[16])             
            query(rq58) tensor<int8>(v[16])             
            query(rq59) tensor<int8>(v[16])             
            query(rq60) tensor<int8>(v[16])             
            query(rq61) tensor<int8>(v[16])             
            query(rq62) tensor<int8>(v[16])             
            query(rq63) tensor<int8>(v[16])             
            query(qt) tensor<float>(querytoken{}, v[128])             
            query(qtb) tensor<int8>(querytoken{}, v[16])             
        
        }
        function max_sim() {
            expression {
                
                                    sum(
                                        reduce(
                                            sum(
                                                query(qt) * unpack_bits(attribute(embedding)) , v
                                            ),
                                            max, patch
                                        ),
                                        querytoken
                                    )
                                
            }
        }
        function max_sim_binary() {
            expression {
                
                                    sum(
                                      reduce(
                                        1/(1 + sum(
                                            hamming(query(qtb), attribute(embedding)) ,v)
                                        ),
                                        max,
                                        patch
                                      ),
                                      querytoken
                                    )
                                
            }
        }
        first-phase {
            expression {
                max_sim_binary
            }
        }
        second-phase {
            rerank-count: 10
            expression {
                max_sim
            }
        }
    }
}