Skip to content

Secure Information Retrieval

Introduction

Secure Information Retrieval(SIR) securely retrieves target value(s) from host. This module is based on Pohlig Hellman commutative encryption and Hauck Oblivious Transfer(OT). This module is still in the research stage, has not yet been put into production.

How to Use

This component can be used to retrieve specific feature value(s) or label value, with arbitrary security level.

Param

sir_param

Classes

SecureInformationRetrievalParam (BaseParam)
Parameters

float, default 0.5

security level, should set value in [0, 1] if security_level equals 0.0 means raw data retrieval

{"OT_Hauck"}

OT type, only supports OT_Hauck

commutative_encryption : {"CommutativeEncryptionPohligHellman"} the commutative encryption scheme used

non_committing_encryption : {"aes"} the non-committing encryption scheme used

dh_params params for Pohlig-Hellman Encryption

int, value >= 1024

the key length of the commutative cipher; note that this param will be deprecated in future, please specify key_length in PHParam instead.

bool

perform raw retrieval if raw_retrieval

str or list of str

target cols to retrieve; any values not retrieved will be marked as "unretrieved", if target_cols is None, label will be retrieved, same behavior as in previous version default None

Source code in federatedml/param/sir_param.py
class SecureInformationRetrievalParam(BaseParam):
    """
    Parameters
    ----------
    security_level: float, default 0.5
        security level, should set value in [0, 1]
        if security_level equals 0.0 means raw data retrieval

    oblivious_transfer_protocol: {"OT_Hauck"}
        OT type, only supports OT_Hauck

    commutative_encryption : {"CommutativeEncryptionPohligHellman"}
        the commutative encryption scheme used

    non_committing_encryption : {"aes"}
        the non-committing encryption scheme used

    dh_params
        params for Pohlig-Hellman Encryption

    key_size: int, value >= 1024
        the key length of the commutative cipher;
        note that this param will be deprecated in future, please specify key_length in PHParam instead.

    raw_retrieval: bool
        perform raw retrieval if raw_retrieval

    target_cols: str or list of str
        target cols to retrieve;
        any values not retrieved will be marked as "unretrieved",
        if target_cols is None, label will be retrieved, same behavior as in previous version
        default None

    """

    def __init__(self, security_level=0.5,
                 oblivious_transfer_protocol=consts.OT_HAUCK,
                 commutative_encryption=consts.CE_PH,
                 non_committing_encryption=consts.AES,
                 key_size=consts.DEFAULT_KEY_LENGTH,
                 dh_params=DHParam(),
                 raw_retrieval=False,
                 target_cols=None):
        super(SecureInformationRetrievalParam, self).__init__()
        self.security_level = security_level
        self.oblivious_transfer_protocol = oblivious_transfer_protocol
        self.commutative_encryption = commutative_encryption
        self.non_committing_encryption = non_committing_encryption
        self.dh_params = dh_params
        self.key_size = key_size
        self.raw_retrieval = raw_retrieval
        self.target_cols = [] if target_cols is None else target_cols

    def check(self):
        descr = "secure information retrieval param's "
        self.check_decimal_float(self.security_level, descr + "security_level")
        self.oblivious_transfer_protocol = self.check_and_change_lower(self.oblivious_transfer_protocol,
                                                                       [consts.OT_HAUCK.lower()],
                                                                       descr + "oblivious_transfer_protocol")
        self.commutative_encryption = self.check_and_change_lower(self.commutative_encryption,
                                                                  [consts.CE_PH.lower()],
                                                                  descr + "commutative_encryption")
        self.non_committing_encryption = self.check_and_change_lower(self.non_committing_encryption,
                                                                     [consts.AES.lower()],
                                                                     descr + "non_committing_encryption")
        if self._warn_to_deprecate_param("key_size", descr, "dh_param's key_length"):
            self.dh_params.key_length = self.key_size
        self.dh_params.check()
        if self._warn_to_deprecate_param("raw_retrieval", descr, "dh_param's security_level = 0"):
            self.check_boolean(self.raw_retrieval, descr)
        if not isinstance(self.target_cols, list):
            self.target_cols = [self.target_cols]
        for col in self.target_cols:
            self.check_string(col, descr + "target_cols")
        if len(self.target_cols) == 0:
            LOGGER.warning(f"Both 'target_cols' and 'target_indexes' are empty. Label will be retrieved.")
__init__(self, security_level=0.5, oblivious_transfer_protocol='OT_Hauck', commutative_encryption='CommutativeEncryptionPohligHellman', non_committing_encryption='aes', key_size=1024, dh_params=<federatedml.param.intersect_param.DHParam object at 0x7f3a40bc7910>, raw_retrieval=False, target_cols=None) special
Source code in federatedml/param/sir_param.py
def __init__(self, security_level=0.5,
             oblivious_transfer_protocol=consts.OT_HAUCK,
             commutative_encryption=consts.CE_PH,
             non_committing_encryption=consts.AES,
             key_size=consts.DEFAULT_KEY_LENGTH,
             dh_params=DHParam(),
             raw_retrieval=False,
             target_cols=None):
    super(SecureInformationRetrievalParam, self).__init__()
    self.security_level = security_level
    self.oblivious_transfer_protocol = oblivious_transfer_protocol
    self.commutative_encryption = commutative_encryption
    self.non_committing_encryption = non_committing_encryption
    self.dh_params = dh_params
    self.key_size = key_size
    self.raw_retrieval = raw_retrieval
    self.target_cols = [] if target_cols is None else target_cols
check(self)
Source code in federatedml/param/sir_param.py
def check(self):
    descr = "secure information retrieval param's "
    self.check_decimal_float(self.security_level, descr + "security_level")
    self.oblivious_transfer_protocol = self.check_and_change_lower(self.oblivious_transfer_protocol,
                                                                   [consts.OT_HAUCK.lower()],
                                                                   descr + "oblivious_transfer_protocol")
    self.commutative_encryption = self.check_and_change_lower(self.commutative_encryption,
                                                              [consts.CE_PH.lower()],
                                                              descr + "commutative_encryption")
    self.non_committing_encryption = self.check_and_change_lower(self.non_committing_encryption,
                                                                 [consts.AES.lower()],
                                                                 descr + "non_committing_encryption")
    if self._warn_to_deprecate_param("key_size", descr, "dh_param's key_length"):
        self.dh_params.key_length = self.key_size
    self.dh_params.check()
    if self._warn_to_deprecate_param("raw_retrieval", descr, "dh_param's security_level = 0"):
        self.check_boolean(self.raw_retrieval, descr)
    if not isinstance(self.target_cols, list):
        self.target_cols = [self.target_cols]
    for col in self.target_cols:
        self.check_string(col, descr + "target_cols")
    if len(self.target_cols) == 0:
        LOGGER.warning(f"Both 'target_cols' and 'target_indexes' are empty. Label will be retrieved.")

Examples

Example
## Secure Information Retrieval Configuration Usage Guide.

This section introduces a python script for SIR task.

#### Secure Information Retrieval Task.

1. Secure Information Retrieval Task to Retrieve Select Feature(s):
    script: secure-information-retrieval.py

Users can use following commands to running the task.

    python ${pipeline_script}
secure-information-retrieval.py
import argparse

from pipeline.backend.pipeline import PipeLine
from pipeline.component import Reader
from pipeline.component import DataTransform
from pipeline.component import SecureInformationRetrieval
from pipeline.interface import Data


from pipeline.utils.tools import load_job_config


def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role="guest", party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="datatransform_0")
    data_transform_0.get_party_instance(
        role="guest", party_id=guest).component_param(
        with_label=False, output_format="dense")
    data_transform_0.get_party_instance(role="host", party_id=host).component_param(with_label=True)

    param = {
        "security_level": 0.5,
        "oblivious_transfer_protocol": "OT_Hauck",
        "commutative_encryption": "CommutativeEncryptionPohligHellman",
        "non_committing_encryption": "aes",
        "dh_params": {
            "key_length": 1024
        },
        "raw_retrieval": False,
        "target_cols": ["x0", "x3"]
    }
    secure_information_retrieval_0 = SecureInformationRetrieval(name="secure_information_retrieval_0", **param)

    # add components to pipeline, in order of task execution.
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(secure_information_retrieval_0, data=Data(data=data_transform_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()


if __name__ == "__main__":
    parser = argparse.ArgumentParser("PIPELINE DEMO")
    parser.add_argument("-config", type=str,
                        help="config file")
    args = parser.parse_args()
    if args.config is not None:
        main(args.config)
    else:
        main()
secure_information_retrieval_testsuite.json
{
    "data": [
        {
            "file": "examples/data/breast_hetero_guest.csv",
            "head": 1,
            "partition": 16,
            "table_name": "breast_hetero_guest",
            "namespace": "experiment",
            "role": "host_0"
        },
        {
            "file": "examples/data/breast_hetero_host.csv",
            "head": 1,
            "partition": 16,
            "table_name": "breast_hetero_host",
            "namespace": "experiment",
            "role": "guest_0"
        }
    ],
    "pipeline_tasks": {
        "secure-information-retrieval": {
            "script": "./secure-information-retrieval.py"
        }
    }
}
## Secure Information Retrieval Configuration Usage Guide.

 This section introduces the dsl and conf for SIR task.

1. Secure Information Retrieval Task to Retrieve Select Feature(s):

    dsl: test_secure_information_retrieval_dsl.json

    runtime_config : test_secure_information_retrieval_conf.json

 Users can use following commands to run the task.

     flow -f submit_job -c ${runtime_config} -d ${dsl}        
test_secure_information_retrieval_conf.json
{
    "dsl_version": 2,
    "initiator": {
        "role": "guest",
        "party_id": 9999
    },
    "role": {
        "host": [
            9998
        ],
        "guest": [
            9999
        ]
    },
    "component_parameters": {
        "role": {
            "guest": {
                "0": {
                    "reader_0": {
                        "table": {
                            "name": "breast_hetero_host",
                            "namespace": "experiment"
                        }
                    },
                    "data_transform_0": {
                        "with_label": false
                    }
                }
            },
            "host": {
                "0": {
                    "reader_0": {
                        "table": {
                            "name": "breast_hetero_guest",
                            "namespace": "experiment"
                        }
                    },
                    "data_transform_0": {
                        "with_label": true
                    }
                }
            }
        },
        "common": {
            "secure_information_retrieval_0": {
                "security_level": 0.5,
                "oblivious_transfer_protocol": "OT_Hauck",
                "commutative_encryption": "CommutativeEncryptionPohligHellman",
                "non_committing_encryption": "aes",
                "dh_params": {
                    "key_length": 1024
                },
                "raw_retrieval": false,
                "target_cols": [
                    "x0",
                    "x3"
                ]
            }
        }
    }
}            
test_secure_information_retrieval_dsl.json
{
    "components": {
        "reader_0": {
            "module": "Reader",
            "output": {
                "data": [
                    "data"
                ]
            }
        },
        "data_transform_0": {
            "module": "DataTransform",
            "input": {
                "data": {
                    "data": [
                        "reader_0.data"
                    ]
                }
            },
            "output": {
                "data": [
                    "data"
                ],
                "model": [
                    "model"
                ]
            }
        },
        "secure_information_retrieval_0": {
            "module": "SecureInformationRetrieval",
            "input": {
                "data": {
                    "data": [
                        "data_transform_0.data"
                    ]
                }
            },
            "output": {
                "data": [
                    "data"
                ],
                "model": [
                    "model"
                ]
            }
        }
    }
}            
secure_information_retrieval_testsuite.json
{
    "data": [
        {
            "file": "examples/data/breast_hetero_guest.csv",
            "head": 1,
            "partition": 16,
            "table_name": "breast_hetero_guest",
            "namespace": "experiment",
            "role": "host_0"
        },
        {
            "file": "examples/data/breast_hetero_host.csv",
            "head": 1,
            "partition": 16,
            "table_name": "breast_hetero_host",
            "namespace": "experiment",
            "role": "guest_0"
        }
    ],
    "tasks": {
        "secure-information-retrieval": {
            "conf": "test_secure_information_retrieval_conf.json",
            "dsl": "test_secure_information_retrieval_dsl.json"
        }
    }
}            

Last update: 2022-02-16