Synthesize tabular data
Use YData's RegularSynthesizer to generate tabular synthetic data
For a more detailed tutorial please check YData Fabric Academy ydata-sdk notebooks.
import os
from ydata.sdk.dataset import get_dataset
from ydata.sdk.synthesizers import RegularSynthesizer
# Do not forget to add your token as env variables
os.environ["YDATA_TOKEN"] = '<TOKEN>' # Remove if already defined
def main():
"""In this example, we demonstrate how to train a synthesizer from a pandas
DataFrame.
After training a Regular Synthesizer, we request a sample.
"""
X = get_dataset('census')
# We initialize a regular synthesizer
# As long as the synthesizer does not call `fit`, it exists only locally
synth = RegularSynthesizer()
# We train the synthesizer on our dataset
synth.fit(X)
# We request a synthetic dataset with 50 rows
sample = synth.sample(n_samples=50)
print(sample.shape)
if __name__ == "__main__":
main()